Merge branch 'fix-pipeline' into 'development'

fix: perbaikan file .gitlab-ci.yml dan docker-compose untuk LTI API

See merge request mbugroup/lti-api!33
This commit is contained in:
kris
2025-10-21 16:56:56 +00:00
3546 changed files with 4952571 additions and 107 deletions
+7 -3
View File
@@ -3,9 +3,13 @@ root = "."
tmp_dir = "tmp"
[build]
cmd = "go build -o ./tmp/main ./cmd/api"
bin = "tmp/main"
full_bin = "APP_ENV=dev ./tmp/main"
# Build binary utama
cmd = "go build -o /lti-api/tmp/main ./cmd/api"
# Lokasi binary hasil build
bin = "/lti-api/tmp/main"
# Jalankan binary langsung dengan environment dev
full_bin = "APP_ENV=dev /lti-api/tmp/main"
# File yang dipantau oleh Air
include_ext = ["go", "tpl", "tmpl", "html"]
exclude_dir = ["vendor", "tmp"]
+58
View File
@@ -0,0 +1,58 @@
# .env.lti-api (Development Server with Domain)
# =============================================
# Server configuration
VERSION=0.0.1
APP_ENV=dev
APP_HOST=0.0.0.0
APP_PORT=8081
APP_URL=https://dev-api-lti.mbugroup.id
# Database configuration (pakai PostgreSQL milik SSO)
DB_HOST=sso-postgres
DB_USER=postgres
DB_PASSWORD=postgres
DB_NAME=db_lti_erp
DB_PORT=5432
# JWT configuration
JWT_SECRET=changeme
JWT_ACCESS_EXP_MINUTES=30
JWT_REFRESH_EXP_DAYS=30
JWT_RESET_PASSWORD_EXP_MINUTES=10
JWT_VERIFY_EMAIL_EXP_MINUTES=10
# Redis (pakai Redis milik SSO)
REDIS_URL=redis://sso-redis:6379/0
# CORS configuration
CORS_ALLOW_ORIGINS=https://dev-api-sso.mbugroup.id,https://dev-lti.mbugroup.id,https://dev-api-lti.mbugroup.id,http://localhost:3000
CORS_ALLOW_METHODS=GET,POST,PUT,PATCH,DELETE,OPTIONS
CORS_ALLOW_HEADERS=Authorization,Content-Type,X-Requested-With
CORS_EXPOSE_HEADERS=Link,Location
CORS_ALLOW_CREDENTIALS=true
CORS_MAX_AGE=600
# SSO Integration (Gunakan domain backend SSO)
SSO_ISSUER=https://dev-api-sso.mbugroup.id
SSO_JWKS_URL=https://dev-api-sso.mbugroup.id/api/.well-known/jwks.json
SSO_ALLOWED_AUDIENCES=
SSO_AUTHORIZE_URL=https://dev-api-sso.mbugroup.id/api/sso/authorize
SSO_TOKEN_URL=https://dev-api-sso.mbugroup.id/api/sso/token
SSO_GETME_URL=https://dev-api-sso.mbugroup.id/api/auth/get-me
# Cookie & session configuration
SSO_ACCESS_COOKIE_NAME=sso_access
SSO_REFRESH_COOKIE_NAME=sso_refresh
SSO_COOKIE_DOMAIN=.mbugroup.id
SSO_COOKIE_SECURE=true
SSO_COOKIE_SAMESITE=Lax
SSO_PKCE_TTL_SECONDS=300
# SSO webhook / user sync settings
SSO_USER_SYNC_SIGNATURE_DRIFT_SECONDS=120
SSO_USER_SYNC_NONCE_TTL_SECONDS=600
SSO_USER_SYNC_MAX_BODY_BYTES=32768
# Client registration for SSO
SSO_CLIENTS={"Lumbung-Telur-Indonesia":{"public_id":"Lumbung-Telur-Indonesia","redirect_uri":"https://dev-api-lti.mbugroup.id/api/sso/callback","scope":"openid profile","default_return_uri":"https://dev-lti.mbugroup.id","allowed_return_origins":["https://dev-lti.mbugroup.id","http://localhost:3000"],"sync_secret":"onUyfODIMHOh4TgGLgyWLmsNeVNxFRHqoLJFLPjr"}}
+43 -104
View File
@@ -1,120 +1,59 @@
# --- Load .env kalau ada, dan export ke shell child ---
ifneq (,$(wildcard .env))
include .env
export
endif
# ===============================
# LTI-API Makefile (Docker Setup)
# ===============================
# --- Konfigurasi umum ---
COMPOSE ?= docker compose -f docker-compose.local.yml
NETWORK ?= lti-api_go-network
MIGRATE_IMAGE ?= migrate/migrate
MIGRATIONS_DIR := $(PWD)/internal/database/migrations
APP_NAME := lti-api
COMPOSE := docker compose -f docker-compose.yaml
NETWORK := lti-network
ENV_FILE := .env.lti-api
# Fallback agar tetap jalan meski .env kosong
DB_HOST ?= postgresdb
DB_PORT ?= 5432
DB_USER ?= postgres
DB_PASSWORD ?= postgres
DB_NAME ?= db_lti_erp
include $(ENV_FILE)
export $(shell sed 's/=.*//' $(ENV_FILE))
DB_URL := postgres://$(DB_USER):$(DB_PASSWORD)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)?sslmode=disable
MIGRATIONS_DIR := ./migrations
MIGRATE_IMAGE := migrate/migrate:v4.15.2
DB_URL := postgres://$(DB_USER):$(DB_PASSWORD)@lti-postgres:5432/$(DB_NAME)?sslmode=disable
# Tunggu DB ready memakai pg_isready dari image postgres
WAIT_DB := docker run --rm --network $(NETWORK) postgres:alpine \
sh -c 'until pg_isready -h $(DB_HOST) -p $(DB_PORT) -U $(DB_USER) -d $(DB_NAME); do echo "waiting for postgres..."; sleep 1; done'
# Default target
.DEFAULT_GOAL := start
# --- Daftar phony targets ---
.PHONY: start build test lint gen \
db-up wait-db \
migration-% migrate-up migrate-down migrate-fresh \
seed \
docker-local docker-down docker-nuke docker-cache psql
# --- Go workflow ---
start:
@go run cmd/api/main.go
build:
@go build -o tmp/app ./cmd/api
test:
@go test ./test/...
lint:
@golangci-lint run
# --- Compose / DB helpers ---
db-up:
@$(COMPOSE) up -d postgresdb
wait-db:
@$(WAIT_DB)
# --- Migration (pembuatan file) ---
# Contoh: make migration-create_users_table
# ":" akan diubah ke "_" (biar aman untuk nama file)
migration-%:
@migrate create -ext sql -dir internal/database/migrations $(subst :,_,$*)
# --- Migration (apply via docker image 'migrate') ---
migrate-up: db-up wait-db
@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" up
# Contoh:
# make migrate-down step=2 → rollback 2 step
# make migrate-down → rollback semua
migrate-down: db-up wait-db
@if [ -n "$(step)" ]; then \
echo "⬇️ Migrating down $(step) step(s)..."; \
docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" down $(step); \
else \
echo "⬇️ Migrating down ALL steps..."; \
docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" down -all; \
fi
migrate-fresh: migrate-down migrate-up
@true
# Pakai: make migrate-force v=20250917120000
migrate-force:
@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" force $(v)
# --- Seeder ---
seed: db-up wait-db
@$(COMPOSE) run --rm app go run cmd/seed/main.go
# --- Docker orchestration convenience ---
# --- Docker ---
docker-local:
@echo "🚀 Starting $(APP_NAME) with local PostgreSQL & Redis..."
@$(COMPOSE) up --build -d
docker-down:
@$(COMPOSE) down --remove-orphans
# ⚠️ Akan menghapus container, images dan volumes.
docker-nuke:
@echo "💣 Removing all containers, images, and volumes..."
@$(COMPOSE) down --rmi all --volumes --remove-orphans
docker-cache:
@docker builder prune -f
# --- Database / Migration ---
# --- PSQL shell ke DB di container ---
psql: db-up
@$(COMPOSE) exec -it postgresdb psql -U $(DB_USER) -d $(DB_NAME)
wait-db:
@echo "⏳ Waiting for database lti-postgres to be ready (inside Docker network)..."
@$(COMPOSE) run --rm app sh -c 'until nc -z lti-postgres 5432; do echo "Waiting for DB..."; sleep 2; done; echo "✅ Database is ready!"'
# Single feature
# example: make gen feat=product-category
migrate-up: wait-db
@echo "⬆️ Running migrations..."
@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" up
# Sub feature
# make gen feat=master/area
gen:
@go run tools/gen.go $(feat)
# @goimports -w internal
migrate-down: wait-db
@echo "⬇️ Rolling back all migrations..."
@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" down -all
seed:
@echo "🌱 Running seed script..."
@$(COMPOSE) run --rm app go run cmd/seed/main.go
psql:
@docker exec -it lti-postgres psql -U $(DB_USER) -d $(DB_NAME)
logs:
@$(COMPOSE) logs -f app
restart:
@$(COMPOSE) restart
status:
@$(COMPOSE) ps
+77
View File
@@ -0,0 +1,77 @@
version: "3.9"
services:
dev-lti-api:
container_name: dev-lti-api
build:
context: .
dockerfile: Dockerfile.local
image: dev-lti-api:latest
working_dir: /lti-api
command: air -c .air.toml
ports:
- "8081:8081"
env_file:
- .env.lti-api
environment:
# override agar koneksi ke container internal
DB_HOST: dev-lti-postgres
DB_PORT: 5432
REDIS_URL: redis://dev-lti-redis:6379/0
volumes:
- .:/lti-api
- ./internal/config/jwtRS256.key:/run/keys/jwtRS256.key
- ./internal/config/jwtRS256.key.pub:/run/keys/jwtRS256.key.pub
depends_on:
- dev-lti-postgres
- dev-lti-redis
networks:
- lti-network
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:8081/healthz || exit 1"]
interval: 10s
timeout: 3s
retries: 10
start_period: 10s
dev-lti-postgres:
image: postgres:15-alpine
container_name: dev-lti-postgres
restart: always
environment:
POSTGRES_USER: ${DB_USER:-postgres}
POSTGRES_PASSWORD: ${DB_PASSWORD:-postgres}
POSTGRES_DB: ${DB_NAME:-db_lti_erp}
ports:
- "5433:5432"
volumes:
- dev-lti-postgres-data:/var/lib/postgresql/data
networks:
- lti-network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER:-postgres} -d ${DB_NAME:-db_lti_erp}"]
interval: 10s
timeout: 5s
retries: 5
start_period: 5s
dev-lti-redis:
image: redis:7-alpine
container_name: dev-lti-redis
restart: always
ports:
- "6380:6379"
networks:
- lti-network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 10
networks:
lti-network:
driver: bridge
volumes:
dev-lti-postgres-data:
+201
View File
@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2021 Micah Parks
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
File diff suppressed because one or more lines are too long
+69
View File
@@ -0,0 +1,69 @@
package keyfunc
import (
"crypto/ecdsa"
"crypto/elliptic"
"errors"
"fmt"
"math/big"
)
const (
// ktyEC is the key type (kty) in the JWT header for ECDSA.
ktyEC = "EC"
// p256 represents a 256-bit cryptographic elliptical curve type.
p256 = "P-256"
// p384 represents a 384-bit cryptographic elliptical curve type.
p384 = "P-384"
// p521 represents a 521-bit cryptographic elliptical curve type.
p521 = "P-521"
)
var (
// ErrECDSACurve indicates an error with the ECDSA curve.
ErrECDSACurve = errors.New("invalid ECDSA curve")
)
// ECDSA parses a jsonWebKey and turns it into an ECDSA public key.
func (j *jsonWebKey) ECDSA() (publicKey *ecdsa.PublicKey, err error) {
if j.X == "" || j.Y == "" || j.Curve == "" {
return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyEC)
}
// Decode the X coordinate from Base64.
//
// According to RFC 7518, this is a Base64 URL unsigned integer.
// https://tools.ietf.org/html/rfc7518#section-6.3
xCoordinate, err := base64urlTrailingPadding(j.X)
if err != nil {
return nil, err
}
yCoordinate, err := base64urlTrailingPadding(j.Y)
if err != nil {
return nil, err
}
publicKey = &ecdsa.PublicKey{}
switch j.Curve {
case p256:
publicKey.Curve = elliptic.P256()
case p384:
publicKey.Curve = elliptic.P384()
case p521:
publicKey.Curve = elliptic.P521()
default:
return nil, fmt.Errorf("%w: unknown curve: %s", ErrECDSACurve, j.Curve)
}
// Turn the X coordinate into *big.Int.
//
// According to RFC 7517, these numbers are in big-endian format.
// https://tools.ietf.org/html/rfc7517#appendix-A.1
publicKey.X = big.NewInt(0).SetBytes(xCoordinate)
publicKey.Y = big.NewInt(0).SetBytes(yCoordinate)
return publicKey, nil
}
+29
View File
@@ -0,0 +1,29 @@
package keyfunc
import (
"crypto/ed25519"
"fmt"
)
const (
// ktyEC is the key type (kty) in the JWT header for EdDSA.
ktyOKP = "OKP"
)
// EdDSA parses a jsonWebKey and turns it into a EdDSA public key.
func (j *jsonWebKey) EdDSA() (publicKey ed25519.PublicKey, err error) {
if j.X == "" {
return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyOKP)
}
// Decode the public key from Base64.
//
// According to RFC 8037, this is from Base64 URL bytes.
// https://datatracker.ietf.org/doc/html/rfc8037#appendix-A.2
publicBytes, err := base64urlTrailingPadding(j.X)
if err != nil {
return nil, err
}
return publicBytes, nil
}
+130
View File
@@ -0,0 +1,130 @@
{
"keys": [
{
"kid": "zXew0UJ1h6Q4CCcd_9wxMzvcp5cEBifH0KWrCz2Kyxc",
"kty": "RSA",
"alg": "PS256",
"use": "sig",
"n": "wqS81x6fItPUdh1OWCT8p3AuLYgFlpmg61WXp6sp1pVijoyF29GOSaD9xE-vLtegX-5h0BnP7va0bwsOAPdh6SdeVslEifNGHCtID0xNFqHNWcXSt4eLfQKAPFUq0TsEO-8P1QHRq6yeG8JAFaxakkaagLFuV8Vd_21PGJFWhvJodJLhX_-Ym9L8XUpIPps_mQriMUOWDe-5DWjHnDtfV7mgaOxbBvVo3wj8V2Lmo5Li4HabT4MEzeJ6e9IdFo2kj_44Yy9osX-PMPtu8BQz_onPgf0wjrVWt349Rj6OkS8RxlNGYeuIxYZr0TOhP5F-yEPhSXDsKdVTwPf7zAAaKQ",
"e": "AQAB",
"x5c": [
"MIICmzCCAYMCBgF4HR7HNDANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzEwMTcwOTE5WhcNMzEwMzEwMTcxMDU5WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDCpLzXHp8i09R2HU5YJPyncC4tiAWWmaDrVZenqynWlWKOjIXb0Y5JoP3ET68u16Bf7mHQGc/u9rRvCw4A92HpJ15WyUSJ80YcK0gPTE0Woc1ZxdK3h4t9AoA8VSrROwQ77w/VAdGrrJ4bwkAVrFqSRpqAsW5XxV3/bU8YkVaG8mh0kuFf/5ib0vxdSkg+mz+ZCuIxQ5YN77kNaMecO19XuaBo7FsG9WjfCPxXYuajkuLgdptPgwTN4np70h0WjaSP/jhjL2ixf48w+27wFDP+ic+B/TCOtVa3fj1GPo6RLxHGU0Zh64jFhmvRM6E/kX7IQ+FJcOwp1VPA9/vMABopAgMBAAEwDQYJKoZIhvcNAQELBQADggEBALILq1Z4oQNJZEUt24VZcvknsWtQtvPxl3JNcBQgDR5/IMgl5VndRZ9OT56KUqrR5xRsWiCvh5Lgv4fUEzAAo9ToiPLub1SKP063zWrvfgi3YZ19bty0iXFm7l2cpQ3ejFV7WpcdLJE0lapFdPLo6QaRdgNu/1p4vbYg7zSK1fQ0OY5b3ajhAx/bhWlrN685owRbO5/r4rUOa6oo9l4Qn7jUxKUx4rcoe7zUM7qrpOPqKvn0DBp3n1/+9pOZXCjIfZGvYwP5NhzBDCkRzaXcJHlOqWzMBzyovVrzVmUilBcj+EsTYJs0gVXKzduX5zO6YWhFs23lu7AijdkxTY65YM0="
],
"x5t": "IYIeevIT57t8ppUejM42Bqx6f3I",
"x5t#S256": "TuOrBy2NcTlFSWuZ8Kh8W8AjQagb4fnfP1SlKMO8-So"
},
{
"kid": "ebJxnm9B3QDBljB5XJWEu72qx6BawDaMAhwz4aKPkQ0",
"kty": "EC",
"alg": "ES512",
"use": "sig",
"crv": "P-521",
"x": "YQ95Xj8MTzcHytbU1h8YkCN2kdEQA7ThuZ1ctB9Ekiw6tlM9RwL62eQvzEt4Rz8qN69uRqgU9RzxQOkSU5xVvyo",
"y": "SMMuP3QnAPHtx7Go2ARsG3NBaySWBLmVvS8s2Ss7Vm_ISWenNbdjKOsY1XvtiQz5scGzWDCEUoZzgV8Ve1mLOV0"
},
{
"kid": "TVAAet63O3xy_KK6_bxVIu7Ra3_z1wlB543Fbwi5VaU",
"kty": "EC",
"alg": "ES384",
"use": "sig",
"crv": "P-384",
"x": "Pik2o5as-evijFABH5p6YLXHnWw8iQ_N1ummPY1c_UgG6NO0za-gNOhTz2-tsd_w",
"y": "e98VSff71k19SY_mHgp3707lgQVrhfVpiGa-sGaKxOWVpxd2jWMhB0Q4RpSRuCp5"
},
{
"kid": "arlUxX4hh56rNO-XdIPhDT7bqBMqcBwNQuP_TnZJNGs",
"kty": "RSA",
"alg": "RS512",
"use": "sig",
"n": "hhtifu8LL3ICE3BAX5l1KZv6Lni0lhlhBusSfepnpxcb4C_z2U71cQTnLY27kt8WB4bNG6e5_KMx9K3xUdd3euj9MCq8vytwEPieeHE1KXQuhJfLv017lhpK_dRMOHyc-9-50YNdgs_8KWRkrzjjuYrCiO9Iu76n5319e-SC8OPvNUglqxp2N0Sp2ltne2ZrpN8T3OEEXT62TSGmLAVopRGw5gllNVrJfmEyZJCRrBM6s5CQcz8un0FjkAAC4DI6QD-eBL0qG3_NR0hQvR1he2o4BLwjOKH45Pk_jj-eArp-DD6Xq6ABQVb5SNOSdaxl5lnmuotRoY3G5d9YSl-K3w",
"e": "AQAB",
"x5c": [
"MIICmzCCAYMCBgF4HSCcDzANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzEwMTcxMTE5WhcNMzEwMzEwMTcxMjU5WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCGG2J+7wsvcgITcEBfmXUpm/oueLSWGWEG6xJ96menFxvgL/PZTvVxBOctjbuS3xYHhs0bp7n8ozH0rfFR13d66P0wKry/K3AQ+J54cTUpdC6El8u/TXuWGkr91Ew4fJz737nRg12Cz/wpZGSvOOO5isKI70i7vqfnfX175ILw4+81SCWrGnY3RKnaW2d7Zmuk3xPc4QRdPrZNIaYsBWilEbDmCWU1Wsl+YTJkkJGsEzqzkJBzPy6fQWOQAALgMjpAP54EvSobf81HSFC9HWF7ajgEvCM4ofjk+T+OP54Cun4MPperoAFBVvlI05J1rGXmWea6i1Ghjcbl31hKX4rfAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAB7bpwPoL02WGCCVhCsbDkq9GeFUwF01opVyFTijZlTUoTf5RcaR2qAH9/irkLjZeFeyozzC5mGvIVruBwnx/6l4PcAMxKK4YiheFVoO/dytpGMCj6ToNmKpjlXzOLAHelieWIUDtAFSYzENjIO01PyXTGYpxebpQCocJBvppj5HqARS9iNPcqBltMhxWrWmMu81tOG3Y7yd2xsIYXk6KjaoefLeN8Was4BPJ0zR6tTSEm6ZOvSRvlppqh84kz7LmWem7gGHAsY2G3tWBUmOdO/SMNMThqV62yLf7sKsuoE1w06lfmrf6D2zGwoEyz+TT6fdSkc34Yeh7+c01X6nFWU="
],
"x5t": "geiCPGtT_10T8xGLUK1LA0_YQEE",
"x5t#S256": "dLp3_QNGwMbYll5VecnR8Q9NSeFVfqJPBTa2_8qf48I"
},
{
"kid": "tW6ae7TomE6_2jooM-sf9N_6lWg7HNtaQXrDsElBzM4",
"kty": "RSA",
"alg": "PS512",
"use": "sig",
"n": "p32N7jqKfMUB6_dKY1uZ3wizzPlBAXg9XrntfUcwNLRPfTBnshpt4uQBf3T8fexkbzhtR18oHvim-YvcWfC5eLGQmWHYiVwACa_C7oGqx51ijK2LRbUg4TKhnZX2X3Ld9xvr3HsosKh2UXn_Ay8nuvdfH-U6S7btT6a-AIFlt3BpqZP0EOl7rY-ie8nXoA13xX6BoyzYiNcugdYCU6czQcmTIJ1JLS0zohi4aTNehRt-1VMRpIMx7q7Ouq3Zhbi7RcDo-_D8FPRhWc2eEKd-h8ebFTIxEOrkguBIomjEFTf3SfYbOB_h-14v9Q2yz-NzyId3-ujRCQGC0hn-cixe2w",
"e": "AQAB",
"x5c": [
"MIICmzCCAYMCBgF4BKAxqzANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzA1MjMwMDEwWhcNMzEwMzA1MjMwMTUwWjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCnfY3uOop8xQHr90pjW5nfCLPM+UEBeD1eue19RzA0tE99MGeyGm3i5AF/dPx97GRvOG1HXyge+Kb5i9xZ8Ll4sZCZYdiJXAAJr8LugarHnWKMrYtFtSDhMqGdlfZfct33G+vceyiwqHZRef8DLye6918f5TpLtu1Ppr4AgWW3cGmpk/QQ6Xutj6J7ydegDXfFfoGjLNiI1y6B1gJTpzNByZMgnUktLTOiGLhpM16FG37VUxGkgzHurs66rdmFuLtFwOj78PwU9GFZzZ4Qp36Hx5sVMjEQ6uSC4EiiaMQVN/dJ9hs4H+H7Xi/1DbLP43PIh3f66NEJAYLSGf5yLF7bAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAHVWNBTExqlg4LTcyhUXI5U0iNPcMIVdKDoGPDc3EPjXyYNyjURX0oZ6b1Wv5t+XGmpZRqJNYb92xraQatIzLEsRn4IrmzViP+dIyFU8BEDubixTxeqx7LSw2j6LIFnZ05XdmWknlksNTlqi4CT6KL+1c24+QU3CcmU3mkQEIPA2yC4SdAB1oXI0jh49uP6a+JrE7JREZGAdwbIpZ1cqV6acPiJW3tOYfLrHwo7KYn3KwJvIBHXgFBNwx7fl2gYNQ0VEGKub3qVwW5RO5R/6Tcla9uZEfEiamms/Pn4hFA1qbsNHtA9IRGVRSmVeBKDxRvo0fxOUXp+NuZxEnhsoP3I="
],
"x5t": "f1l1fxICz1fe9mI-sSrtc19EDhU",
"x5t#S256": "NUJWRA4ADpLEg_SMkSoE4FKQN0H1Tlz85L-i7puVcqQ"
},
{
"kid": "Lx1FmayP2YBtxaqS1SKJRJGiXRKnw2ov5WmYIMG-BLE",
"kty": "RSA",
"alg": "PS384",
"use": "sig",
"n": "q7WM4SnrdzlFSo_A1DRhc-8Ho-pBsfs49kGRbw3O_OKFIUyZrzHaRuovW_QaEAyiO3HX8CNcGPcpHdmpl4DhTGEBLcd6xXtCaa65ct00Mq7ZHCRRCrKLh6lJ0rY9fP8vCV0RBigpkNoRfrqLQQN4VeVFTbGSrDaS0LzPbap0-q5FKXUR-OQmQEtOupXhKFQtbB73tL83YnG6Swl7nXsx54ulEoDzcCCYt7pjCVVp7L9fzI2_ucTdtQclAJVQZGKpsx7vabOJuiMUwuAIz56lOJyXRMePsW8UogwC4FA2A52STsYlhOPsDEW4iIExFVNqs-CGoDGhYLIavaCkZhXM0w",
"e": "AQAB",
"x5c": [
"MIICmzCCAYMCBgF4HR+9XjANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzEwMTcxMDIyWhcNMzEwMzEwMTcxMjAyWjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCrtYzhKet3OUVKj8DUNGFz7wej6kGx+zj2QZFvDc784oUhTJmvMdpG6i9b9BoQDKI7cdfwI1wY9ykd2amXgOFMYQEtx3rFe0Jprrly3TQyrtkcJFEKsouHqUnStj18/y8JXREGKCmQ2hF+uotBA3hV5UVNsZKsNpLQvM9tqnT6rkUpdRH45CZAS066leEoVC1sHve0vzdicbpLCXudezHni6USgPNwIJi3umMJVWnsv1/Mjb+5xN21ByUAlVBkYqmzHu9ps4m6IxTC4AjPnqU4nJdEx4+xbxSiDALgUDYDnZJOxiWE4+wMRbiIgTEVU2qz4IagMaFgshq9oKRmFczTAgMBAAEwDQYJKoZIhvcNAQELBQADggEBADTgP3SrcG3p9XUB7sM4a2IeY0J4bSEtqlZBuHgdgekYJ5DXETJ3hV/82GjitU50NBup0IJyI9KZ0KCwqHIKC2Jn/6biOpM9Ipk4BtNVzx3qKNsDac9qZmyMpm4V9QuWakajknerhwyynG3siGUntbPmLvf5UKvKtbiKlWS4dBPwfedIUnC85mYEnNKSzSI1NiM6TWHB9zQYkARXlb89sh0HBYs08BfRMyBVM+l3OczIyGeQAfhcL+pxPP/0jqPr1ctHUBj2zXkjZxDw1oJFgeD9GDtPcjc3spB20vsRtQUBlzbJElbGflqWGHJK5l5n7gNd3ZXZT0HJ+wUpPE8EUaM="
],
"x5t": "fjRYR1986VCLzbaZaw5r25UKahw",
"x5t#S256": "ZHNHpizlsjD3qSZh7gJQQBu8W9jBL2HR0y7-3u2Wb-g"
},
{
"kid": "gnmAfvmlsi3kKH3VlM1AJ85P2hekQ8ON_XvJqs3xPD8",
"kty": "RSA",
"alg": "RS384",
"use": "sig",
"n": "qUNQewKl3APQcbpACMNJ2XphPpupt395z6OZvj5CW9tiRXY3J7dqi8U0bWoIhtmmc7Js6hjp-A5W_FVStuXlT1hLyjJsHeu9ZVPnfIl2MnYN83zQBKw8E4mFsVv0UXNvkVPBF_k0yXrz-ABleWLOgFGnkNU9csc3Z5aihHcwRmC_oS7PZ9Vc-l0xBCyF3YRHI-al8ppSHwFreOweF3-JP3poNAXd906_tjX2KlHSJmNqcUNiSfEluyCp02ALlRFKXUQ1HlfSupHcHySDlanfUyIzZgM9ysCvC1vfNdAuwZ44oUBMul_XPxxhzlewL2Y8PtSDLUDWGTIou8M8049D8Q",
"e": "AQAB",
"x5c": [
"MIICmzCCAYMCBgF4BJVfaDANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzA1MjI0ODIxWhcNMzEwMzA1MjI1MDAxWjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCpQ1B7AqXcA9BxukAIw0nZemE+m6m3f3nPo5m+PkJb22JFdjcnt2qLxTRtagiG2aZzsmzqGOn4Dlb8VVK25eVPWEvKMmwd671lU+d8iXYydg3zfNAErDwTiYWxW/RRc2+RU8EX+TTJevP4AGV5Ys6AUaeQ1T1yxzdnlqKEdzBGYL+hLs9n1Vz6XTEELIXdhEcj5qXymlIfAWt47B4Xf4k/emg0Bd33Tr+2NfYqUdImY2pxQ2JJ8SW7IKnTYAuVEUpdRDUeV9K6kdwfJIOVqd9TIjNmAz3KwK8LW9810C7BnjihQEy6X9c/HGHOV7AvZjw+1IMtQNYZMii7wzzTj0PxAgMBAAEwDQYJKoZIhvcNAQELBQADggEBABoThxhMd7Xiq4x0GJeoJFv2yDKXCL3dJEAEWtOr2+PqdeJl/ZfOxBXynIvrdtYnQdICztN5ydEgDsZ02piDsxZ+s/0SA0iqjw/MEoBYobmr8V+xwUv+WtRLpTBXqWGMuG7NEtrbjKid0iKLLAOAU4dcHQ49iOF9VLnbTkf1EXp4iphJreaubOXMwT6/JDzQPT1dRR34hlhYeKKzMSA0Cz5aYL1tI+eH12rar0MDczXykLChNS/8MlyTzreEf0siUiS9S1kj/lOZKQDg9E/z8fm5vmHEHzAVwf4ON5iO29tDsqLw7BeJqC4AESjliXIqMrdpFynfPnIsGgf3dnph5BM="
],
"x5t": "CmRnQVduZWtEsdOC4mauUUsSWxA",
"x5t#S256": "BvC0LmuM8ZIApN3TQQZWWbGO-d082Ah5d3D6vPvahGw"
},
{
"kid": "CGt0ZWS4Lc5faiKSdi0tU0fjCAdvGROQRGU9iR7tV0A",
"kty": "EC",
"alg": "ES256",
"use": "sig",
"crv": "P-256",
"x": "DPW7n9yjfE6Rt-VvVmEdeu4QdW44qifocAPPDxACDDY",
"y": "-ejsVw8222-hg2dJWx3QV0hE4-I0Ujp7ZsWebE68JE0"
},
{
"kid": "C65q0EKQyhpd1m4fr7SKO2He_nAxgCtAdws64d2BLt8",
"kty": "RSA",
"alg": "RS256",
"use": "sig",
"n": "ja99ybDrLvw11Z4CvNlDI-kkqJEBpSnvDf0pZF2DvBlvYmeVYL_ChqIe8E9GyHUmLMdtO_jifSgOqE5b8vILwi1kZnJR7N857uEnbWM9YTeevi_RZ-E_hr4frW2NKJ78YGvCzwLKG2GgtSjj0zuTLnSaK8fCGzqXgy6paXNhgHUSZgGwvO0YItpMlyJeqEj1wGTWz1IyA1sguF1cC7K0fojPbPoBwrhvaAeoGRPLraE0rrBsQv8iiLwnRBIez9B1j0NiUG8Iad953Y7UzaKOAw8crIEK45NIK_yxHUpxqcHLjPIcRyIyJGioRyGK7cp-_7iPLOCutQc-u46mom1_ZQ",
"e": "AQAB",
"x5c": [
"MIICmzCCAYMCBgF4BJRpbzANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzA1MjI0NzE4WhcNMzEwMzA1MjI0ODU4WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCNr33JsOsu/DXVngK82UMj6SSokQGlKe8N/SlkXYO8GW9iZ5Vgv8KGoh7wT0bIdSYsx207+OJ9KA6oTlvy8gvCLWRmclHs3znu4SdtYz1hN56+L9Fn4T+Gvh+tbY0onvxga8LPAsobYaC1KOPTO5MudJorx8IbOpeDLqlpc2GAdRJmAbC87Rgi2kyXIl6oSPXAZNbPUjIDWyC4XVwLsrR+iM9s+gHCuG9oB6gZE8utoTSusGxC/yKIvCdEEh7P0HWPQ2JQbwhp33ndjtTNoo4DDxysgQrjk0gr/LEdSnGpwcuM8hxHIjIkaKhHIYrtyn7/uI8s4K61Bz67jqaibX9lAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAHrGJFhVNiQupIwkn2jiW/jBobm9CHUxOwQL5E7WdRz5uaOJ0v62PrynOQE9xim9Qk8bT3q7DThZs66U9bpIk3msKVRgXRfn5FZy1H5RKOlEEFZhGakPqSlC1yPbhUNhHXMs3GTzdGMLtYaGvSy6XM/8/zqVqVwgh6BpbAR9RfiSdyaiNTSBriu+n/tHW934G9J8UIzdfpVcb0Yt9y4o0UgIXt64NtGFq7zmNJijH88AxBZFB6eUUmQQCczebzoAjyYbVOes5gGFzboVWcyLe3iyD0vvsAVHJViXeiGoxhpKnc8ryISpRUBzsKngf5uZo3bnrD9PHLYBoGOHgzII1xw="
],
"x5t": "5GNr3LeRXHWI4YR8-QTSsF98oTI",
"x5t#S256": "Dgd0_wZZqvRuf4GEISPNHREX-1ixTMIsrPeGzk0bCxs"
},
{
"kty": "OKP",
"d": "TJ0UPkOZDPfneEDSH2ETbLQWjrALD-BPZQR-E7mgPvY",
"use": "sig",
"crv": "Ed25519",
"kid": "Q56A",
"x": "iZli54E2SkbrOvAThwrnxn1AMIOaazi_ckl6B-hbDK8"
},
{
"kty": "oct",
"use": "sig",
"kid": "hmac",
"k": "V_8Ob8dVs6JuZx6expyjShoUgFgxoaovGjmGhesL2jA"
},
{
"e": "AQAB",
"use": "enc",
"kid": "kidWithBadUse",
"kty": "RSA",
"n": "znO8fsURSvghcjbMu2nysqZhsreTkj-y46YL39kctmlj7-qqVLuvTUtw0XvsxwLi9WWczz_BsAm2Rn6LzyhvXUXjj6uMP8tk-HhWc4RMXP-esqB7y6WUmR8SioT94SykuVhWMDxwkg7kXTg_GWEYibEFJ7YM16vVZ2Na5z2vRfMRy7VARXRhDrinJmW0B-oY9FurPTyaZSDqOr-3Qkhk1jm9-6ygFsOkmnd4Ljnq28t8hq_4k3bdZSolZv11boQS8vDO-Fo_2YoQVxm4YMIjcr8bxZcali2slOEytEC5ItOKTPA_CydM62sJubw7MuTrOKh6GJrq0xnw6MtqR46-MQ"
}
]
}
+247
View File
@@ -0,0 +1,247 @@
package keyfunc
import (
"bytes"
"context"
"errors"
"fmt"
"net/http"
"sync"
"time"
)
var (
// ErrRefreshImpossible is returned when a refresh is attempted on a JWKS that was not created from a remote
// resource.
ErrRefreshImpossible = errors.New("refresh impossible: JWKS was not created from a remote resource")
// defaultRefreshTimeout is the default duration for the context used to create the HTTP request for a refresh of
// the JWKS.
defaultRefreshTimeout = time.Minute
)
// Get loads the JWKS at the given URL.
func Get(jwksURL string, options Options) (jwks *JWKS, err error) {
jwks = &JWKS{
jwksURL: jwksURL,
}
applyOptions(jwks, options)
if jwks.client == nil {
jwks.client = http.DefaultClient
}
if jwks.requestFactory == nil {
jwks.requestFactory = defaultRequestFactory
}
if jwks.responseExtractor == nil {
jwks.responseExtractor = ResponseExtractorStatusOK
}
if jwks.refreshTimeout == 0 {
jwks.refreshTimeout = defaultRefreshTimeout
}
if !options.JWKUseNoWhitelist && len(jwks.jwkUseWhitelist) == 0 {
jwks.jwkUseWhitelist = map[JWKUse]struct{}{
UseOmitted: {},
UseSignature: {},
}
}
err = jwks.refresh()
if err != nil {
if options.TolerateInitialJWKHTTPError {
if jwks.refreshErrorHandler != nil {
jwks.refreshErrorHandler(err)
}
jwks.keys = make(map[string]parsedJWK)
} else {
return nil, err
}
}
if jwks.refreshInterval != 0 || jwks.refreshUnknownKID {
if jwks.ctx == nil {
jwks.ctx = context.Background()
}
jwks.ctx, jwks.cancel = context.WithCancel(jwks.ctx)
jwks.refreshRequests = make(chan refreshRequest, 1)
go jwks.backgroundRefresh()
}
return jwks, nil
}
// Refresh manually refreshes the JWKS with the remote resource. It can bypass the rate limit if configured to do so.
// This function will return an ErrRefreshImpossible if the JWKS was created from a static source like given keys or raw
// JSON, because there is no remote resource to refresh from.
//
// This function will block until the refresh is finished or an error occurs.
func (j *JWKS) Refresh(ctx context.Context, options RefreshOptions) error {
if j.jwksURL == "" {
return ErrRefreshImpossible
}
// Check if the background goroutine was launched.
if j.refreshInterval != 0 || j.refreshUnknownKID {
ctx, cancel := context.WithCancel(ctx)
req := refreshRequest{
cancel: cancel,
ignoreRateLimit: options.IgnoreRateLimit,
}
select {
case <-ctx.Done():
return fmt.Errorf("failed to send request refresh to background goroutine: %w", j.ctx.Err())
case j.refreshRequests <- req:
}
<-ctx.Done()
if !errors.Is(ctx.Err(), context.Canceled) {
return fmt.Errorf("unexpected keyfunc background refresh context error: %w", ctx.Err())
}
} else {
err := j.refresh()
if err != nil {
return fmt.Errorf("failed to refresh JWKS: %w", err)
}
}
return nil
}
// backgroundRefresh is meant to be a separate goroutine that will update the keys in a JWKS over a given interval of
// time.
func (j *JWKS) backgroundRefresh() {
var lastRefresh time.Time
var queueOnce sync.Once
var refreshMux sync.Mutex
if j.refreshRateLimit != 0 {
lastRefresh = time.Now().Add(-j.refreshRateLimit)
}
// Create a channel that will never send anything unless there is a refresh interval.
refreshInterval := make(<-chan time.Time)
refresh := func() {
err := j.refresh()
if err != nil && j.refreshErrorHandler != nil {
j.refreshErrorHandler(err)
}
lastRefresh = time.Now()
}
// Enter an infinite loop that ends when the background ends.
for {
if j.refreshInterval != 0 {
refreshInterval = time.After(j.refreshInterval)
}
select {
case <-refreshInterval:
select {
case <-j.ctx.Done():
return
case j.refreshRequests <- refreshRequest{}:
default: // If the j.refreshRequests channel is full, don't send another request.
}
case req := <-j.refreshRequests:
refreshMux.Lock()
if req.ignoreRateLimit {
refresh()
} else if j.refreshRateLimit != 0 && lastRefresh.Add(j.refreshRateLimit).After(time.Now()) {
// Launch a goroutine that will get a reservation for a JWKS refresh or fail to and immediately return.
queueOnce.Do(func() {
go func() {
refreshMux.Lock()
wait := time.Until(lastRefresh.Add(j.refreshRateLimit))
refreshMux.Unlock()
select {
case <-j.ctx.Done():
return
case <-time.After(wait):
}
refreshMux.Lock()
defer refreshMux.Unlock()
refresh()
queueOnce = sync.Once{}
}()
})
} else {
refresh()
}
if req.cancel != nil {
req.cancel()
}
refreshMux.Unlock()
// Clean up this goroutine when its context expires.
case <-j.ctx.Done():
return
}
}
}
func defaultRequestFactory(ctx context.Context, url string) (*http.Request, error) {
return http.NewRequestWithContext(ctx, http.MethodGet, url, bytes.NewReader(nil))
}
// refresh does an HTTP GET on the JWKS URL to rebuild the JWKS.
func (j *JWKS) refresh() (err error) {
var ctx context.Context
var cancel context.CancelFunc
if j.ctx != nil {
ctx, cancel = context.WithTimeout(j.ctx, j.refreshTimeout)
} else {
ctx, cancel = context.WithTimeout(context.Background(), j.refreshTimeout)
}
defer cancel()
req, err := j.requestFactory(ctx, j.jwksURL)
if err != nil {
return fmt.Errorf("failed to create request via factory function: %w", err)
}
resp, err := j.client.Do(req)
if err != nil {
return err
}
jwksBytes, err := j.responseExtractor(ctx, resp)
if err != nil {
return fmt.Errorf("failed to extract response via extractor function: %w", err)
}
// Only reprocess if the JWKS has changed.
if len(jwksBytes) != 0 && bytes.Equal(jwksBytes, j.raw) {
return nil
}
j.raw = jwksBytes
updated, err := NewJSON(jwksBytes)
if err != nil {
return err
}
j.mux.Lock()
defer j.mux.Unlock()
j.keys = updated.keys
if j.givenKeys != nil {
for kid, key := range j.givenKeys {
// Only overwrite the key if configured to do so.
if !j.givenKIDOverride {
if _, ok := j.keys[kid]; ok {
continue
}
}
j.keys[kid] = parsedJWK{public: key.inter}
}
}
return nil
}
+115
View File
@@ -0,0 +1,115 @@
package keyfunc
import (
"crypto/ecdsa"
"crypto/ed25519"
"crypto/rsa"
"encoding/json"
)
// GivenKey represents a cryptographic key that resides in a JWKS. In conjuncture with Options.
type GivenKey struct {
algorithm string
inter interface{}
}
// GivenKeyOptions represents the configuration options for a GivenKey.
type GivenKeyOptions struct {
// Algorithm is the given key's signing algorithm. Its value will be compared to unverified tokens' "alg" header.
//
// See RFC 8725 Section 3.1 for details.
// https://www.rfc-editor.org/rfc/rfc8725#section-3.1
//
// For a list of possible values, please see:
// https://www.rfc-editor.org/rfc/rfc7518#section-3.1
// https://www.iana.org/assignments/jose/jose.xhtml#web-signature-encryption-algorithms
Algorithm string
}
// NewGiven creates a JWKS from a map of given keys.
func NewGiven(givenKeys map[string]GivenKey) (jwks *JWKS) {
keys := make(map[string]parsedJWK)
for kid, given := range givenKeys {
keys[kid] = parsedJWK{
algorithm: given.algorithm,
public: given.inter,
}
}
return &JWKS{
keys: keys,
}
}
// NewGivenCustom creates a new GivenKey given an untyped variable. The key argument is expected to be a type supported
// by the jwt package used.
//
// Consider the options carefully as each field may have a security implication.
//
// See the https://pkg.go.dev/github.com/golang-jwt/jwt/v5#RegisterSigningMethod function for registering an unsupported
// signing method.
func NewGivenCustom(key interface{}, options GivenKeyOptions) (givenKey GivenKey) {
return GivenKey{
algorithm: options.Algorithm,
inter: key,
}
}
// NewGivenECDSA creates a new GivenKey given an ECDSA public key.
//
// Consider the options carefully as each field may have a security implication.
func NewGivenECDSA(key *ecdsa.PublicKey, options GivenKeyOptions) (givenKey GivenKey) {
return GivenKey{
algorithm: options.Algorithm,
inter: key,
}
}
// NewGivenEdDSA creates a new GivenKey given an EdDSA public key.
//
// Consider the options carefully as each field may have a security implication.
func NewGivenEdDSA(key ed25519.PublicKey, options GivenKeyOptions) (givenKey GivenKey) {
return GivenKey{
algorithm: options.Algorithm,
inter: key,
}
}
// NewGivenHMAC creates a new GivenKey given an HMAC key in a byte slice.
//
// Consider the options carefully as each field may have a security implication.
func NewGivenHMAC(key []byte, options GivenKeyOptions) (givenKey GivenKey) {
return GivenKey{
algorithm: options.Algorithm,
inter: key,
}
}
// NewGivenRSA creates a new GivenKey given an RSA public key.
//
// Consider the options carefully as each field may have a security implication.
func NewGivenRSA(key *rsa.PublicKey, options GivenKeyOptions) (givenKey GivenKey) {
return GivenKey{
algorithm: options.Algorithm,
inter: key,
}
}
// NewGivenKeysFromJSON parses a raw JSON message into a map of key IDs (`kid`) to GivenKeys. The returned map is
// suitable for passing to `NewGiven()` or as `Options.GivenKeys` to `Get()`
func NewGivenKeysFromJSON(jwksBytes json.RawMessage) (map[string]GivenKey, error) {
// Parse by making a temporary JWKS instance. No need to lock its map since it doesn't escape this function.
j, err := NewJSON(jwksBytes)
if err != nil {
return nil, err
}
keys := make(map[string]GivenKey, len(j.keys))
for kid, cryptoKey := range j.keys {
keys[kid] = GivenKey{
algorithm: cryptoKey.algorithm,
inter: cryptoKey.public,
}
}
return keys, nil
}
+239
View File
@@ -0,0 +1,239 @@
package keyfunc
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
)
var (
// ErrJWKAlgMismatch indicates that the given JWK was found, but its "alg" parameter's value did not match that of
// the JWT.
ErrJWKAlgMismatch = errors.New(`the given JWK was found, but its "alg" parameter's value did not match the expected algorithm`)
// ErrJWKUseWhitelist indicates that the given JWK was found, but its "use" parameter's value was not whitelisted.
ErrJWKUseWhitelist = errors.New(`the given JWK was found, but its "use" parameter's value was not whitelisted`)
// ErrKIDNotFound indicates that the given key ID was not found in the JWKS.
ErrKIDNotFound = errors.New("the given key ID was not found in the JWKS")
// ErrMissingAssets indicates there are required assets are missing to create a public key.
ErrMissingAssets = errors.New("required assets are missing to create a public key")
)
// ErrorHandler is a function signature that consumes an error.
type ErrorHandler func(err error)
const (
// UseEncryption is a JWK "use" parameter value indicating the JSON Web Key is to be used for encryption.
UseEncryption JWKUse = "enc"
// UseOmitted is a JWK "use" parameter value that was not specified or was empty.
UseOmitted JWKUse = ""
// UseSignature is a JWK "use" parameter value indicating the JSON Web Key is to be used for signatures.
UseSignature JWKUse = "sig"
)
// JWKUse is a set of values for the "use" parameter of a JWK.
// See https://tools.ietf.org/html/rfc7517#section-4.2.
type JWKUse string
// jsonWebKey represents a JSON Web Key inside a JWKS.
type jsonWebKey struct {
Algorithm string `json:"alg"`
Curve string `json:"crv"`
Exponent string `json:"e"`
K string `json:"k"`
ID string `json:"kid"`
Modulus string `json:"n"`
Type string `json:"kty"`
Use string `json:"use"`
X string `json:"x"`
Y string `json:"y"`
}
// parsedJWK represents a JSON Web Key parsed with fields as the correct Go types.
type parsedJWK struct {
algorithm string
public interface{}
use JWKUse
}
// JWKS represents a JSON Web Key Set (JWK Set).
type JWKS struct {
jwkUseWhitelist map[JWKUse]struct{}
cancel context.CancelFunc
client *http.Client
ctx context.Context
raw []byte
givenKeys map[string]GivenKey
givenKIDOverride bool
jwksURL string
keys map[string]parsedJWK
mux sync.RWMutex
refreshErrorHandler ErrorHandler
refreshInterval time.Duration
refreshRateLimit time.Duration
refreshRequests chan refreshRequest
refreshTimeout time.Duration
refreshUnknownKID bool
requestFactory func(ctx context.Context, url string) (*http.Request, error)
responseExtractor func(ctx context.Context, resp *http.Response) (json.RawMessage, error)
}
// rawJWKS represents a JWKS in JSON format.
type rawJWKS struct {
Keys []*jsonWebKey `json:"keys"`
}
// NewJSON creates a new JWKS from a raw JSON message.
func NewJSON(jwksBytes json.RawMessage) (jwks *JWKS, err error) {
var rawKS rawJWKS
err = json.Unmarshal(jwksBytes, &rawKS)
if err != nil {
return nil, err
}
// Iterate through the keys in the raw JWKS. Add them to the JWKS.
jwks = &JWKS{
keys: make(map[string]parsedJWK, len(rawKS.Keys)),
}
for _, key := range rawKS.Keys {
var keyInter interface{}
switch keyType := key.Type; keyType {
case ktyEC:
keyInter, err = key.ECDSA()
if err != nil {
continue
}
case ktyOKP:
keyInter, err = key.EdDSA()
if err != nil {
continue
}
case ktyOct:
keyInter, err = key.Oct()
if err != nil {
continue
}
case ktyRSA:
keyInter, err = key.RSA()
if err != nil {
continue
}
default:
// Ignore unknown key types silently.
continue
}
jwks.keys[key.ID] = parsedJWK{
algorithm: key.Algorithm,
use: JWKUse(key.Use),
public: keyInter,
}
}
return jwks, nil
}
// EndBackground ends the background goroutine to update the JWKS. It can only happen once and is only effective if the
// JWKS has a background goroutine refreshing the JWKS keys.
func (j *JWKS) EndBackground() {
if j.cancel != nil {
j.cancel()
}
}
// KIDs returns the key IDs (`kid`) for all keys in the JWKS.
func (j *JWKS) KIDs() (kids []string) {
j.mux.RLock()
defer j.mux.RUnlock()
kids = make([]string, len(j.keys))
index := 0
for kid := range j.keys {
kids[index] = kid
index++
}
return kids
}
// Len returns the number of keys in the JWKS.
func (j *JWKS) Len() int {
j.mux.RLock()
defer j.mux.RUnlock()
return len(j.keys)
}
// RawJWKS returns a copy of the raw JWKS received from the given JWKS URL.
func (j *JWKS) RawJWKS() []byte {
j.mux.RLock()
defer j.mux.RUnlock()
raw := make([]byte, len(j.raw))
copy(raw, j.raw)
return raw
}
// ReadOnlyKeys returns a read-only copy of the mapping of key IDs (`kid`) to cryptographic keys.
func (j *JWKS) ReadOnlyKeys() map[string]interface{} {
keys := make(map[string]interface{})
j.mux.Lock()
for kid, cryptoKey := range j.keys {
keys[kid] = cryptoKey.public
}
j.mux.Unlock()
return keys
}
// getKey gets the jsonWebKey from the given KID from the JWKS. It may refresh the JWKS if configured to.
func (j *JWKS) getKey(alg, kid string) (jsonKey interface{}, err error) {
j.mux.RLock()
pubKey, ok := j.keys[kid]
j.mux.RUnlock()
if !ok {
if !j.refreshUnknownKID {
return nil, ErrKIDNotFound
}
ctx, cancel := context.WithCancel(j.ctx)
req := refreshRequest{
cancel: cancel,
}
// Refresh the JWKS.
select {
case <-j.ctx.Done():
return
case j.refreshRequests <- req:
default:
// If the j.refreshRequests channel is full, return the error early.
return nil, ErrKIDNotFound
}
// Wait for the JWKS refresh to finish.
<-ctx.Done()
j.mux.RLock()
defer j.mux.RUnlock()
if pubKey, ok = j.keys[kid]; !ok {
return nil, ErrKIDNotFound
}
}
// jwkUseWhitelist might be empty if the jwks was from keyfunc.NewJSON() or if JWKUseNoWhitelist option was true.
if len(j.jwkUseWhitelist) > 0 {
_, ok = j.jwkUseWhitelist[pubKey.use]
if !ok {
return nil, fmt.Errorf(`%w: JWK "use" parameter value %q is not whitelisted`, ErrJWKUseWhitelist, pubKey.use)
}
}
if pubKey.algorithm != "" && pubKey.algorithm != alg {
return nil, fmt.Errorf(`%w: JWK "alg" parameter value %q does not match token "alg" parameter value %q`, ErrJWKAlgMismatch, pubKey.algorithm, alg)
}
return pubKey.public, nil
}
+59
View File
@@ -0,0 +1,59 @@
package keyfunc
import (
"encoding/base64"
"errors"
"fmt"
"strings"
"github.com/golang-jwt/jwt/v5"
)
var (
// ErrKID indicates that the JWT had an invalid kid.
ErrKID = errors.New("the JWT has an invalid kid")
)
// Keyfunc matches the signature of github.com/golang-jwt/jwt/v5's jwt.Keyfunc function.
func (j *JWKS) Keyfunc(token *jwt.Token) (interface{}, error) {
kid, alg, err := kidAlg(token)
if err != nil {
return nil, err
}
return j.getKey(alg, kid)
}
// Keyfunc matches the signature of github.com/golang-jwt/jwt/v5's jwt.Keyfunc function.
func (m *MultipleJWKS) Keyfunc(token *jwt.Token) (interface{}, error) {
return m.keySelector(m, token)
}
func kidAlg(token *jwt.Token) (kid, alg string, err error) {
kidInter, ok := token.Header["kid"]
if !ok {
return "", "", fmt.Errorf("%w: could not find kid in JWT header", ErrKID)
}
kid, ok = kidInter.(string)
if !ok {
return "", "", fmt.Errorf("%w: could not convert kid in JWT header to string", ErrKID)
}
alg, ok = token.Header["alg"].(string)
if !ok {
// For test coverage purposes, this should be impossible to reach because the JWT package rejects a token
// without an alg parameter in the header before calling jwt.Keyfunc.
return "", "", fmt.Errorf(`%w: the JWT header did not contain the "alg" parameter, which is required by RFC 7515 section 4.1.1`, ErrJWKAlgMismatch)
}
return kid, alg, nil
}
// base64urlTrailingPadding removes trailing padding before decoding a string from base64url. Some non-RFC compliant
// JWKS contain padding at the end values for base64url encoded public keys.
//
// Trailing padding is required to be removed from base64url encoded keys.
// RFC 7517 defines base64url the same as RFC 7515 Section 2:
// https://datatracker.ietf.org/doc/html/rfc7517#section-1.1
// https://datatracker.ietf.org/doc/html/rfc7515#section-2
func base64urlTrailingPadding(s string) ([]byte, error) {
s = strings.TrimRight(s, "=")
return base64.RawURLEncoding.DecodeString(s)
}
+72
View File
@@ -0,0 +1,72 @@
package keyfunc
import (
"errors"
"fmt"
"github.com/golang-jwt/jwt/v5"
)
// ErrMultipleJWKSSize is returned when the number of JWKS given are not enough to make a MultipleJWKS.
var ErrMultipleJWKSSize = errors.New("multiple JWKS must have one or more remote JWK Set resources")
// MultipleJWKS manages multiple JWKS and has a field for jwt.Keyfunc.
type MultipleJWKS struct {
keySelector func(multiJWKS *MultipleJWKS, token *jwt.Token) (key interface{}, err error)
sets map[string]*JWKS // No lock is required because this map is read-only after initialization.
}
// GetMultiple creates a new MultipleJWKS. A map of length one or more JWKS URLs to Options is required.
//
// Be careful when choosing Options for each JWKS in the map. If RefreshUnknownKID is set to true for all JWKS in the
// map then many refresh requests would take place each time a JWT is processed, this should be rate limited by
// RefreshRateLimit.
func GetMultiple(multiple map[string]Options, options MultipleOptions) (multiJWKS *MultipleJWKS, err error) {
if len(multiple) < 1 {
return nil, fmt.Errorf("multiple JWKS must have one or more remote JWK Set resources: %w", ErrMultipleJWKSSize)
}
if options.KeySelector == nil {
options.KeySelector = KeySelectorFirst
}
multiJWKS = &MultipleJWKS{
sets: make(map[string]*JWKS, len(multiple)),
keySelector: options.KeySelector,
}
for u, opts := range multiple {
jwks, err := Get(u, opts)
if err != nil {
return nil, fmt.Errorf("failed to get JWKS from %q: %w", u, err)
}
multiJWKS.sets[u] = jwks
}
return multiJWKS, nil
}
// JWKSets returns a copy of the map of JWK Sets. The map itself is a copy, but the JWKS are not and should be treated
// as read-only.
func (m *MultipleJWKS) JWKSets() map[string]*JWKS {
sets := make(map[string]*JWKS, len(m.sets))
for u, jwks := range m.sets {
sets[u] = jwks
}
return sets
}
// KeySelectorFirst returns the first key found in the multiple JWK Sets.
func KeySelectorFirst(multiJWKS *MultipleJWKS, token *jwt.Token) (key interface{}, err error) {
kid, alg, err := kidAlg(token)
if err != nil {
return nil, err
}
for _, jwks := range multiJWKS.sets {
key, err = jwks.getKey(alg, kid)
if err == nil {
return key, nil
}
}
return nil, fmt.Errorf("failed to find key ID in multiple JWKS: %w", ErrKIDNotFound)
}
+28
View File
@@ -0,0 +1,28 @@
package keyfunc
import (
"fmt"
)
const (
// ktyOct is the key type (kty) in the JWT header for oct.
ktyOct = "oct"
)
// Oct parses a jsonWebKey and turns it into a raw byte slice (octet). This includes HMAC keys.
func (j *jsonWebKey) Oct() (publicKey []byte, err error) {
if j.K == "" {
return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyOct)
}
// Decode the octet key from Base64.
//
// According to RFC 7517, this is Base64 URL bytes.
// https://datatracker.ietf.org/doc/html/rfc7517#section-1.1
publicKey, err = base64urlTrailingPadding(j.K)
if err != nil {
return nil, err
}
return publicKey, nil
}
+165
View File
@@ -0,0 +1,165 @@
package keyfunc
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"time"
"github.com/golang-jwt/jwt/v5"
)
// ErrInvalidHTTPStatusCode indicates that the HTTP status code is invalid.
var ErrInvalidHTTPStatusCode = errors.New("invalid HTTP status code")
// Options represents the configuration options for a JWKS.
//
// If either RefreshInterval is non-zero or RefreshUnknownKID is true, then a background goroutine will be launched to refresh the
// remote JWKS under the specified circumstances.
//
// When using a background refresh goroutine, make sure to use RefreshRateLimit if paired with RefreshUnknownKID. Also
// make sure to end the background refresh goroutine with the JWKS.EndBackground method when it's no longer needed.
type Options struct {
// Client is the HTTP client used to get the JWKS via HTTP.
Client *http.Client
// Ctx is the context for the keyfunc's background refresh. When the context expires or is canceled, the background
// goroutine will end.
Ctx context.Context
// GivenKeys is a map of JWT key IDs, `kid`, to their given keys. If the JWKS has a background refresh goroutine,
// these values persist across JWKS refreshes. By default, if the remote JWKS resource contains a key with the same
// `kid` any given keys with the same `kid` will be overwritten by the keys from the remote JWKS. Use the
// GivenKIDOverride option to flip this behavior.
GivenKeys map[string]GivenKey
// GivenKIDOverride will make a GivenKey override any keys with the same ID (`kid`) in the remote JWKS. The is only
// effectual if GivenKeys is provided.
GivenKIDOverride bool
// JWKUseWhitelist is a whitelist of JWK `use` parameter values that will restrict what keys can be returned for
// jwt.Keyfunc. The assumption is that jwt.Keyfunc is only used for JWT signature verification.
// The default behavior is to only return a JWK if its `use` parameter has the value `"sig"`, an empty string, or if
// the parameter was omitted entirely.
JWKUseWhitelist []JWKUse
// JWKUseNoWhitelist overrides the JWKUseWhitelist field and its default behavior. If set to true, all JWKs will be
// returned regardless of their `use` parameter value.
JWKUseNoWhitelist bool
// RefreshErrorHandler is a function that consumes errors that happen during a JWKS refresh. This is only effectual
// if a background refresh goroutine is active.
RefreshErrorHandler ErrorHandler
// RefreshInterval is the duration to refresh the JWKS in the background via a new HTTP request. If this is not zero,
// then a background goroutine will be used to refresh the JWKS once per the given interval. Make sure to call the
// JWKS.EndBackground method to end this goroutine when it's no longer needed.
RefreshInterval time.Duration
// RefreshRateLimit limits the rate at which refresh requests are granted. Only one refresh request can be queued
// at a time any refresh requests received while there is already a queue are ignored. It does not make sense to
// have RefreshInterval's value shorter than this.
RefreshRateLimit time.Duration
// RefreshTimeout is the duration for the context timeout used to create the HTTP request for a refresh of the JWKS.
// This defaults to one minute. This is used for the HTTP request and any background goroutine refreshes.
RefreshTimeout time.Duration
// RefreshUnknownKID indicates that the JWKS refresh request will occur every time a kid that isn't cached is seen.
// This is done through a background goroutine. Without specifying a RefreshInterval a malicious client could
// self-sign X JWTs, send them to this service, then cause potentially high network usage proportional to X. Make
// sure to call the JWKS.EndBackground method to end this goroutine when it's no longer needed.
//
// It is recommended this option is not used when in MultipleJWKS. This is because KID collisions SHOULD be uncommon
// meaning nearly any JWT SHOULD trigger a refresh for the number of JWKS in the MultipleJWKS minus one.
RefreshUnknownKID bool
// RequestFactory creates HTTP requests for the remote JWKS resource located at the given url. For example, an
// HTTP header could be added to indicate a User-Agent.
RequestFactory func(ctx context.Context, url string) (*http.Request, error)
// ResponseExtractor consumes a *http.Response and produces the raw JSON for the JWKS. By default, the
// ResponseExtractorStatusOK function is used. The default behavior changed in v1.4.0.
ResponseExtractor func(ctx context.Context, resp *http.Response) (json.RawMessage, error)
// TolerateInitialJWKHTTPError will tolerate any error from the initial HTTP JWKS request. If an error occurs,
// the RefreshErrorHandler will be given the error. The program will continue to run as if the error did not occur
// and a valid JWK Set with no keys was received in the response. This allows for the background goroutine to
// request the JWKS at a later time.
//
// It does not make sense to mark this field as true unless the background refresh goroutine is active.
TolerateInitialJWKHTTPError bool
}
// MultipleOptions is used to configure the behavior when multiple JWKS are used by MultipleJWKS.
type MultipleOptions struct {
// KeySelector is a function that selects the key to use for a given token. It will be used in the implementation
// for jwt.Keyfunc. If implementing this custom selector extract the key ID and algorithm from the token's header.
// Use the key ID to select a token and confirm the key's algorithm before returning it.
//
// This value defaults to KeySelectorFirst.
KeySelector func(multiJWKS *MultipleJWKS, token *jwt.Token) (key interface{}, err error)
}
// RefreshOptions are used to specify manual refresh behavior.
type RefreshOptions struct {
IgnoreRateLimit bool
}
type refreshRequest struct {
cancel context.CancelFunc
ignoreRateLimit bool
}
// ResponseExtractorStatusOK is meant to be used as the ResponseExtractor field for Options. It confirms that response
// status code is 200 OK and returns the raw JSON from the response body.
func ResponseExtractorStatusOK(ctx context.Context, resp *http.Response) (json.RawMessage, error) {
//goland:noinspection GoUnhandledErrorResult
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("%w: %d", ErrInvalidHTTPStatusCode, resp.StatusCode)
}
return io.ReadAll(resp.Body)
}
// ResponseExtractorStatusAny is meant to be used as the ResponseExtractor field for Options. It returns the raw JSON
// from the response body regardless of the response status code.
func ResponseExtractorStatusAny(ctx context.Context, resp *http.Response) (json.RawMessage, error) {
//goland:noinspection GoUnhandledErrorResult
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
// applyOptions applies the given options to the given JWKS.
func applyOptions(jwks *JWKS, options Options) {
if options.Ctx != nil {
jwks.ctx, jwks.cancel = context.WithCancel(options.Ctx)
}
if options.GivenKeys != nil {
jwks.givenKeys = make(map[string]GivenKey)
for kid, key := range options.GivenKeys {
jwks.givenKeys[kid] = key
}
}
if !options.JWKUseNoWhitelist {
jwks.jwkUseWhitelist = make(map[JWKUse]struct{})
for _, use := range options.JWKUseWhitelist {
jwks.jwkUseWhitelist[use] = struct{}{}
}
}
jwks.client = options.Client
jwks.givenKIDOverride = options.GivenKIDOverride
jwks.refreshErrorHandler = options.RefreshErrorHandler
jwks.refreshInterval = options.RefreshInterval
jwks.refreshRateLimit = options.RefreshRateLimit
jwks.refreshTimeout = options.RefreshTimeout
jwks.refreshUnknownKID = options.RefreshUnknownKID
jwks.requestFactory = options.RequestFactory
jwks.responseExtractor = options.ResponseExtractor
}
+43
View File
@@ -0,0 +1,43 @@
package keyfunc
import (
"crypto/rsa"
"fmt"
"math/big"
)
const (
// ktyRSA is the key type (kty) in the JWT header for RSA.
ktyRSA = "RSA"
)
// RSA parses a jsonWebKey and turns it into an RSA public key.
func (j *jsonWebKey) RSA() (publicKey *rsa.PublicKey, err error) {
if j.Exponent == "" || j.Modulus == "" {
return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyRSA)
}
// Decode the exponent from Base64.
//
// According to RFC 7518, this is a Base64 URL unsigned integer.
// https://tools.ietf.org/html/rfc7518#section-6.3
exponent, err := base64urlTrailingPadding(j.Exponent)
if err != nil {
return nil, err
}
modulus, err := base64urlTrailingPadding(j.Modulus)
if err != nil {
return nil, err
}
publicKey = &rsa.PublicKey{}
// Turn the exponent into an integer.
//
// According to RFC 7517, these numbers are in big-endian format.
// https://tools.ietf.org/html/rfc7517#appendix-A.1
publicKey.E = int(big.NewInt(0).SetBytes(exponent).Uint64())
publicKey.N = big.NewInt(0).SetBytes(modulus)
return publicKey, nil
}
+19
View File
@@ -0,0 +1,19 @@
Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+14
View File
@@ -0,0 +1,14 @@
This package is a brotli compressor and decompressor implemented in Go.
It was translated from the reference implementation (https://github.com/google/brotli)
with the `c2go` tool at https://github.com/andybalholm/c2go.
I have been working on new compression algorithms (not translated from C)
in the matchfinder package.
You can use them with the NewWriterV2 function.
Currently they give better results than the old implementation
(at least for compressing my test file, Newtons *Opticks*)
on levels 2 to 6.
I am using it in production with https://github.com/andybalholm/redwood.
API documentation is found at https://pkg.go.dev/github.com/andybalholm/brotli?tab=doc.
+185
View File
@@ -0,0 +1,185 @@
package brotli
import (
"sync"
)
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find backward reference copies. */
func computeDistanceCode(distance uint, max_distance uint, dist_cache []int) uint {
if distance <= max_distance {
var distance_plus_3 uint = distance + 3
var offset0 uint = distance_plus_3 - uint(dist_cache[0])
var offset1 uint = distance_plus_3 - uint(dist_cache[1])
if distance == uint(dist_cache[0]) {
return 0
} else if distance == uint(dist_cache[1]) {
return 1
} else if offset0 < 7 {
return (0x9750468 >> (4 * offset0)) & 0xF
} else if offset1 < 7 {
return (0xFDB1ACE >> (4 * offset1)) & 0xF
} else if distance == uint(dist_cache[2]) {
return 2
} else if distance == uint(dist_cache[3]) {
return 3
}
}
return distance + numDistanceShortCodes - 1
}
var hasherSearchResultPool sync.Pool
func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) {
var max_backward_limit uint = maxBackwardLimit(params.lgwin)
var insert_length uint = *last_insert_len
var pos_end uint = position + num_bytes
var store_end uint
if num_bytes >= hasher.StoreLookahead() {
store_end = position + num_bytes - hasher.StoreLookahead() + 1
} else {
store_end = position
}
var random_heuristics_window_size uint = literalSpreeLengthForSparseSearch(params)
var apply_random_heuristics uint = position + random_heuristics_window_size
var gap uint = 0
/* Set maximum distance, see section 9.1. of the spec. */
const kMinScore uint = scoreBase + 100
/* For speed up heuristics for random data. */
/* Minimum score to accept a backward reference. */
hasher.PrepareDistanceCache(dist_cache)
sr2, _ := hasherSearchResultPool.Get().(*hasherSearchResult)
if sr2 == nil {
sr2 = &hasherSearchResult{}
}
sr, _ := hasherSearchResultPool.Get().(*hasherSearchResult)
if sr == nil {
sr = &hasherSearchResult{}
}
for position+hasher.HashTypeLength() < pos_end {
var max_length uint = pos_end - position
var max_distance uint = brotli_min_size_t(position, max_backward_limit)
sr.len = 0
sr.len_code_delta = 0
sr.distance = 0
sr.score = kMinScore
hasher.FindLongestMatch(&params.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position, max_length, max_distance, gap, params.dist.max_distance, sr)
if sr.score > kMinScore {
/* Found a match. Let's look for something even better ahead. */
var delayed_backward_references_in_row int = 0
max_length--
for ; ; max_length-- {
var cost_diff_lazy uint = 175
if params.quality < minQualityForExtensiveReferenceSearch {
sr2.len = brotli_min_size_t(sr.len-1, max_length)
} else {
sr2.len = 0
}
sr2.len_code_delta = 0
sr2.distance = 0
sr2.score = kMinScore
max_distance = brotli_min_size_t(position+1, max_backward_limit)
hasher.FindLongestMatch(&params.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position+1, max_length, max_distance, gap, params.dist.max_distance, sr2)
if sr2.score >= sr.score+cost_diff_lazy {
/* Ok, let's just write one byte for now and start a match from the
next byte. */
position++
insert_length++
*sr = *sr2
delayed_backward_references_in_row++
if delayed_backward_references_in_row < 4 && position+hasher.HashTypeLength() < pos_end {
continue
}
}
break
}
apply_random_heuristics = position + 2*sr.len + random_heuristics_window_size
max_distance = brotli_min_size_t(position, max_backward_limit)
{
/* The first 16 codes are special short-codes,
and the minimum offset is 1. */
var distance_code uint = computeDistanceCode(sr.distance, max_distance+gap, dist_cache)
if (sr.distance <= (max_distance + gap)) && distance_code > 0 {
dist_cache[3] = dist_cache[2]
dist_cache[2] = dist_cache[1]
dist_cache[1] = dist_cache[0]
dist_cache[0] = int(sr.distance)
hasher.PrepareDistanceCache(dist_cache)
}
*commands = append(*commands, makeCommand(&params.dist, insert_length, sr.len, sr.len_code_delta, distance_code))
}
*num_literals += insert_length
insert_length = 0
/* Put the hash keys into the table, if there are enough bytes left.
Depending on the hasher implementation, it can push all positions
in the given range or only a subset of them.
Avoid hash poisoning with RLE data. */
{
var range_start uint = position + 2
var range_end uint = brotli_min_size_t(position+sr.len, store_end)
if sr.distance < sr.len>>2 {
range_start = brotli_min_size_t(range_end, brotli_max_size_t(range_start, position+sr.len-(sr.distance<<2)))
}
hasher.StoreRange(ringbuffer, ringbuffer_mask, range_start, range_end)
}
position += sr.len
} else {
insert_length++
position++
/* If we have not seen matches for a long time, we can skip some
match lookups. Unsuccessful match lookups are very very expensive
and this kind of a heuristic speeds up compression quite
a lot. */
if position > apply_random_heuristics {
/* Going through uncompressible data, jump. */
if position > apply_random_heuristics+4*random_heuristics_window_size {
var kMargin uint = brotli_max_size_t(hasher.StoreLookahead()-1, 4)
/* It is quite a long time since we saw a copy, so we assume
that this data is not compressible, and store hashes less
often. Hashes of non compressible data are less likely to
turn out to be useful in the future, too, so we store less of
them to not to flood out the hash table of good compressible
data. */
var pos_jump uint = brotli_min_size_t(position+16, pos_end-kMargin)
for ; position < pos_jump; position += 4 {
hasher.Store(ringbuffer, ringbuffer_mask, position)
insert_length += 4
}
} else {
var kMargin uint = brotli_max_size_t(hasher.StoreLookahead()-1, 2)
var pos_jump uint = brotli_min_size_t(position+8, pos_end-kMargin)
for ; position < pos_jump; position += 2 {
hasher.Store(ringbuffer, ringbuffer_mask, position)
insert_length += 2
}
}
}
}
}
insert_length += pos_end - position
*last_insert_len = insert_length
hasherSearchResultPool.Put(sr)
hasherSearchResultPool.Put(sr2)
}
+796
View File
@@ -0,0 +1,796 @@
package brotli
import "math"
type zopfliNode struct {
length uint32
distance uint32
dcode_insert_length uint32
u struct {
cost float32
next uint32
shortcut uint32
}
}
const maxEffectiveDistanceAlphabetSize = 544
const kInfinity float32 = 1.7e38 /* ~= 2 ^ 127 */
var kDistanceCacheIndex = []uint32{0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}
var kDistanceCacheOffset = []int{0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3}
func initZopfliNodes(array []zopfliNode, length uint) {
var stub zopfliNode
var i uint
stub.length = 1
stub.distance = 0
stub.dcode_insert_length = 0
stub.u.cost = kInfinity
for i = 0; i < length; i++ {
array[i] = stub
}
}
func zopfliNodeCopyLength(self *zopfliNode) uint32 {
return self.length & 0x1FFFFFF
}
func zopfliNodeLengthCode(self *zopfliNode) uint32 {
var modifier uint32 = self.length >> 25
return zopfliNodeCopyLength(self) + 9 - modifier
}
func zopfliNodeCopyDistance(self *zopfliNode) uint32 {
return self.distance
}
func zopfliNodeDistanceCode(self *zopfliNode) uint32 {
var short_code uint32 = self.dcode_insert_length >> 27
if short_code == 0 {
return zopfliNodeCopyDistance(self) + numDistanceShortCodes - 1
} else {
return short_code - 1
}
}
func zopfliNodeCommandLength(self *zopfliNode) uint32 {
return zopfliNodeCopyLength(self) + (self.dcode_insert_length & 0x7FFFFFF)
}
/* Histogram based cost model for zopflification. */
type zopfliCostModel struct {
cost_cmd_ [numCommandSymbols]float32
cost_dist_ []float32
distance_histogram_size uint32
literal_costs_ []float32
min_cost_cmd_ float32
num_bytes_ uint
}
func initZopfliCostModel(self *zopfliCostModel, dist *distanceParams, num_bytes uint) {
var distance_histogram_size uint32 = dist.alphabet_size
if distance_histogram_size > maxEffectiveDistanceAlphabetSize {
distance_histogram_size = maxEffectiveDistanceAlphabetSize
}
self.num_bytes_ = num_bytes
self.literal_costs_ = make([]float32, (num_bytes + 2))
self.cost_dist_ = make([]float32, (dist.alphabet_size))
self.distance_histogram_size = distance_histogram_size
}
func cleanupZopfliCostModel(self *zopfliCostModel) {
self.literal_costs_ = nil
self.cost_dist_ = nil
}
func setCost(histogram []uint32, histogram_size uint, literal_histogram bool, cost []float32) {
var sum uint = 0
var missing_symbol_sum uint
var log2sum float32
var missing_symbol_cost float32
var i uint
for i = 0; i < histogram_size; i++ {
sum += uint(histogram[i])
}
log2sum = float32(fastLog2(sum))
missing_symbol_sum = sum
if !literal_histogram {
for i = 0; i < histogram_size; i++ {
if histogram[i] == 0 {
missing_symbol_sum++
}
}
}
missing_symbol_cost = float32(fastLog2(missing_symbol_sum)) + 2
for i = 0; i < histogram_size; i++ {
if histogram[i] == 0 {
cost[i] = missing_symbol_cost
continue
}
/* Shannon bits for this symbol. */
cost[i] = log2sum - float32(fastLog2(uint(histogram[i])))
/* Cannot be coded with less than 1 bit */
if cost[i] < 1 {
cost[i] = 1
}
}
}
func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbuffer []byte, ringbuffer_mask uint, commands []command, last_insert_len uint) {
var histogram_literal [numLiteralSymbols]uint32
var histogram_cmd [numCommandSymbols]uint32
var histogram_dist [maxEffectiveDistanceAlphabetSize]uint32
var cost_literal [numLiteralSymbols]float32
var pos uint = position - last_insert_len
var min_cost_cmd float32 = kInfinity
var cost_cmd []float32 = self.cost_cmd_[:]
var literal_costs []float32
histogram_literal = [numLiteralSymbols]uint32{}
histogram_cmd = [numCommandSymbols]uint32{}
histogram_dist = [maxEffectiveDistanceAlphabetSize]uint32{}
for i := range commands {
var inslength uint = uint(commands[i].insert_len_)
var copylength uint = uint(commandCopyLen(&commands[i]))
var distcode uint = uint(commands[i].dist_prefix_) & 0x3FF
var cmdcode uint = uint(commands[i].cmd_prefix_)
var j uint
histogram_cmd[cmdcode]++
if cmdcode >= 128 {
histogram_dist[distcode]++
}
for j = 0; j < inslength; j++ {
histogram_literal[ringbuffer[(pos+j)&ringbuffer_mask]]++
}
pos += inslength + copylength
}
setCost(histogram_literal[:], numLiteralSymbols, true, cost_literal[:])
setCost(histogram_cmd[:], numCommandSymbols, false, cost_cmd)
setCost(histogram_dist[:], uint(self.distance_histogram_size), false, self.cost_dist_)
for i := 0; i < numCommandSymbols; i++ {
min_cost_cmd = brotli_min_float(min_cost_cmd, cost_cmd[i])
}
self.min_cost_cmd_ = min_cost_cmd
{
literal_costs = self.literal_costs_
var literal_carry float32 = 0.0
num_bytes := int(self.num_bytes_)
literal_costs[0] = 0.0
for i := 0; i < num_bytes; i++ {
literal_carry += cost_literal[ringbuffer[(position+uint(i))&ringbuffer_mask]]
literal_costs[i+1] = literal_costs[i] + literal_carry
literal_carry -= literal_costs[i+1] - literal_costs[i]
}
}
}
func zopfliCostModelSetFromLiteralCosts(self *zopfliCostModel, position uint, ringbuffer []byte, ringbuffer_mask uint) {
var literal_costs []float32 = self.literal_costs_
var literal_carry float32 = 0.0
var cost_dist []float32 = self.cost_dist_
var cost_cmd []float32 = self.cost_cmd_[:]
var num_bytes uint = self.num_bytes_
var i uint
estimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask, ringbuffer, literal_costs[1:])
literal_costs[0] = 0.0
for i = 0; i < num_bytes; i++ {
literal_carry += literal_costs[i+1]
literal_costs[i+1] = literal_costs[i] + literal_carry
literal_carry -= literal_costs[i+1] - literal_costs[i]
}
for i = 0; i < numCommandSymbols; i++ {
cost_cmd[i] = float32(fastLog2(uint(11 + uint32(i))))
}
for i = 0; uint32(i) < self.distance_histogram_size; i++ {
cost_dist[i] = float32(fastLog2(uint(20 + uint32(i))))
}
self.min_cost_cmd_ = float32(fastLog2(11))
}
func zopfliCostModelGetCommandCost(self *zopfliCostModel, cmdcode uint16) float32 {
return self.cost_cmd_[cmdcode]
}
func zopfliCostModelGetDistanceCost(self *zopfliCostModel, distcode uint) float32 {
return self.cost_dist_[distcode]
}
func zopfliCostModelGetLiteralCosts(self *zopfliCostModel, from uint, to uint) float32 {
return self.literal_costs_[to] - self.literal_costs_[from]
}
func zopfliCostModelGetMinCostCmd(self *zopfliCostModel) float32 {
return self.min_cost_cmd_
}
/* REQUIRES: len >= 2, start_pos <= pos */
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
/* Maintains the "ZopfliNode array invariant". */
func updateZopfliNode(nodes []zopfliNode, pos uint, start_pos uint, len uint, len_code uint, dist uint, short_code uint, cost float32) {
var next *zopfliNode = &nodes[pos+len]
next.length = uint32(len | (len+9-len_code)<<25)
next.distance = uint32(dist)
next.dcode_insert_length = uint32(short_code<<27 | (pos - start_pos))
next.u.cost = cost
}
type posData struct {
pos uint
distance_cache [4]int
costdiff float32
cost float32
}
/* Maintains the smallest 8 cost difference together with their positions */
type startPosQueue struct {
q_ [8]posData
idx_ uint
}
func initStartPosQueue(self *startPosQueue) {
self.idx_ = 0
}
func startPosQueueSize(self *startPosQueue) uint {
return brotli_min_size_t(self.idx_, 8)
}
func startPosQueuePush(self *startPosQueue, posdata *posData) {
var offset uint = ^(self.idx_) & 7
self.idx_++
var len uint = startPosQueueSize(self)
var i uint
var q []posData = self.q_[:]
q[offset] = *posdata
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for i = 1; i < len; i++ {
if q[offset&7].costdiff > q[(offset+1)&7].costdiff {
var tmp posData = q[offset&7]
q[offset&7] = q[(offset+1)&7]
q[(offset+1)&7] = tmp
}
offset++
}
}
func startPosQueueAt(self *startPosQueue, k uint) *posData {
return &self.q_[(k-self.idx_)&7]
}
/* Returns the minimum possible copy length that can improve the cost of any */
/* future position. */
func computeMinimumCopyLength(start_cost float32, nodes []zopfliNode, num_bytes uint, pos uint) uint {
var min_cost float32 = start_cost
var len uint = 2
var next_len_bucket uint = 4
/* Compute the minimum possible cost of reaching any future position. */
var next_len_offset uint = 10
for pos+len <= num_bytes && nodes[pos+len].u.cost <= min_cost {
/* We already reached (pos + len) with no more cost than the minimum
possible cost of reaching anything from this pos, so there is no point in
looking for lengths <= len. */
len++
if len == next_len_offset {
/* We reached the next copy length code bucket, so we add one more
extra bit to the minimum cost. */
min_cost += 1.0
next_len_offset += next_len_bucket
next_len_bucket *= 2
}
}
return uint(len)
}
/* REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
func computeDistanceShortcut(block_start uint, pos uint, max_backward_limit uint, gap uint, nodes []zopfliNode) uint32 {
var clen uint = uint(zopfliNodeCopyLength(&nodes[pos]))
var ilen uint = uint(nodes[pos].dcode_insert_length & 0x7FFFFFF)
var dist uint = uint(zopfliNodeCopyDistance(&nodes[pos]))
/* Since |block_start + pos| is the end position of the command, the copy part
starts from |block_start + pos - clen|. Distances that are greater than
this or greater than |max_backward_limit| + |gap| are static dictionary
references, and do not update the last distances.
Also distance code 0 (last distance) does not update the last distances. */
if pos == 0 {
return 0
} else if dist+clen <= block_start+pos+gap && dist <= max_backward_limit+gap && zopfliNodeDistanceCode(&nodes[pos]) > 0 {
return uint32(pos)
} else {
return nodes[pos-clen-ilen].u.shortcut
}
}
/* Fills in dist_cache[0..3] with the last four distances (as defined by
Section 4. of the Spec) that would be used at (block_start + pos) if we
used the shortest path of commands from block_start, computed from
nodes[0..pos]. The last four distances at block_start are in
starting_dist_cache[0..3].
REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
func computeDistanceCache(pos uint, starting_dist_cache []int, nodes []zopfliNode, dist_cache []int) {
var idx int = 0
var p uint = uint(nodes[pos].u.shortcut)
for idx < 4 && p > 0 {
var ilen uint = uint(nodes[p].dcode_insert_length & 0x7FFFFFF)
var clen uint = uint(zopfliNodeCopyLength(&nodes[p]))
var dist uint = uint(zopfliNodeCopyDistance(&nodes[p]))
dist_cache[idx] = int(dist)
idx++
/* Because of prerequisite, p >= clen + ilen >= 2. */
p = uint(nodes[p-clen-ilen].u.shortcut)
}
for ; idx < 4; idx++ {
dist_cache[idx] = starting_dist_cache[0]
starting_dist_cache = starting_dist_cache[1:]
}
}
/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
is eligible. */
func evaluateNode(block_start uint, pos uint, max_backward_limit uint, gap uint, starting_dist_cache []int, model *zopfliCostModel, queue *startPosQueue, nodes []zopfliNode) {
/* Save cost, because ComputeDistanceCache invalidates it. */
var node_cost float32 = nodes[pos].u.cost
nodes[pos].u.shortcut = computeDistanceShortcut(block_start, pos, max_backward_limit, gap, nodes)
if node_cost <= zopfliCostModelGetLiteralCosts(model, 0, pos) {
var posdata posData
posdata.pos = pos
posdata.cost = node_cost
posdata.costdiff = node_cost - zopfliCostModelGetLiteralCosts(model, 0, pos)
computeDistanceCache(pos, starting_dist_cache, nodes, posdata.distance_cache[:])
startPosQueuePush(queue, &posdata)
}
}
/* Returns longest copy length. */
func updateNodes(num_bytes uint, block_start uint, pos uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, max_backward_limit uint, starting_dist_cache []int, num_matches uint, matches []backwardMatch, model *zopfliCostModel, queue *startPosQueue, nodes []zopfliNode) uint {
var cur_ix uint = block_start + pos
var cur_ix_masked uint = cur_ix & ringbuffer_mask
var max_distance uint = brotli_min_size_t(cur_ix, max_backward_limit)
var max_len uint = num_bytes - pos
var max_zopfli_len uint = maxZopfliLen(params)
var max_iters uint = maxZopfliCandidates(params)
var min_len uint
var result uint = 0
var k uint
var gap uint = 0
evaluateNode(block_start, pos, max_backward_limit, gap, starting_dist_cache, model, queue, nodes)
{
var posdata *posData = startPosQueueAt(queue, 0)
var min_cost float32 = (posdata.cost + zopfliCostModelGetMinCostCmd(model) + zopfliCostModelGetLiteralCosts(model, posdata.pos, pos))
min_len = computeMinimumCopyLength(min_cost, nodes, num_bytes, pos)
}
/* Go over the command starting positions in order of increasing cost
difference. */
for k = 0; k < max_iters && k < startPosQueueSize(queue); k++ {
var posdata *posData = startPosQueueAt(queue, k)
var start uint = posdata.pos
var inscode uint16 = getInsertLengthCode(pos - start)
var start_costdiff float32 = posdata.costdiff
var base_cost float32 = start_costdiff + float32(getInsertExtra(inscode)) + zopfliCostModelGetLiteralCosts(model, 0, pos)
var best_len uint = min_len - 1
var j uint = 0
/* Look for last distance matches using the distance cache from this
starting position. */
for ; j < numDistanceShortCodes && best_len < max_len; j++ {
var idx uint = uint(kDistanceCacheIndex[j])
var backward uint = uint(posdata.distance_cache[idx] + kDistanceCacheOffset[j])
var prev_ix uint = cur_ix - backward
var len uint = 0
var continuation byte = ringbuffer[cur_ix_masked+best_len]
if cur_ix_masked+best_len > ringbuffer_mask {
break
}
if backward > max_distance+gap {
/* Word dictionary -> ignore. */
continue
}
if backward <= max_distance {
/* Regular backward reference. */
if prev_ix >= cur_ix {
continue
}
prev_ix &= ringbuffer_mask
if prev_ix+best_len > ringbuffer_mask || continuation != ringbuffer[prev_ix+best_len] {
continue
}
len = findMatchLengthWithLimit(ringbuffer[prev_ix:], ringbuffer[cur_ix_masked:], max_len)
} else {
continue
}
{
var dist_cost float32 = base_cost + zopfliCostModelGetDistanceCost(model, j)
var l uint
for l = best_len + 1; l <= len; l++ {
var copycode uint16 = getCopyLengthCode(l)
var cmdcode uint16 = combineLengthCodes(inscode, copycode, j == 0)
var tmp float32
if cmdcode < 128 {
tmp = base_cost
} else {
tmp = dist_cost
}
var cost float32 = tmp + float32(getCopyExtra(copycode)) + zopfliCostModelGetCommandCost(model, cmdcode)
if cost < nodes[pos+l].u.cost {
updateZopfliNode(nodes, pos, start, l, l, backward, j+1, cost)
result = brotli_max_size_t(result, l)
}
best_len = l
}
}
}
/* At higher iterations look only for new last distance matches, since
looking only for new command start positions with the same distances
does not help much. */
if k >= 2 {
continue
}
{
/* Loop through all possible copy lengths at this position. */
var len uint = min_len
for j = 0; j < num_matches; j++ {
var match backwardMatch = matches[j]
var dist uint = uint(match.distance)
var is_dictionary_match bool = (dist > max_distance+gap)
var dist_code uint = dist + numDistanceShortCodes - 1
var dist_symbol uint16
var distextra uint32
var distnumextra uint32
var dist_cost float32
var max_match_len uint
/* We already tried all possible last distance matches, so we can use
normal distance code here. */
prefixEncodeCopyDistance(dist_code, uint(params.dist.num_direct_distance_codes), uint(params.dist.distance_postfix_bits), &dist_symbol, &distextra)
distnumextra = uint32(dist_symbol) >> 10
dist_cost = base_cost + float32(distnumextra) + zopfliCostModelGetDistanceCost(model, uint(dist_symbol)&0x3FF)
/* Try all copy lengths up until the maximum copy length corresponding
to this distance. If the distance refers to the static dictionary, or
the maximum length is long enough, try only one maximum length. */
max_match_len = backwardMatchLength(&match)
if len < max_match_len && (is_dictionary_match || max_match_len > max_zopfli_len) {
len = max_match_len
}
for ; len <= max_match_len; len++ {
var len_code uint
if is_dictionary_match {
len_code = backwardMatchLengthCode(&match)
} else {
len_code = len
}
var copycode uint16 = getCopyLengthCode(len_code)
var cmdcode uint16 = combineLengthCodes(inscode, copycode, false)
var cost float32 = dist_cost + float32(getCopyExtra(copycode)) + zopfliCostModelGetCommandCost(model, cmdcode)
if cost < nodes[pos+len].u.cost {
updateZopfliNode(nodes, pos, start, uint(len), len_code, dist, 0, cost)
if len > result {
result = len
}
}
}
}
}
}
return result
}
func computeShortestPathFromNodes(num_bytes uint, nodes []zopfliNode) uint {
var index uint = num_bytes
var num_commands uint = 0
for nodes[index].dcode_insert_length&0x7FFFFFF == 0 && nodes[index].length == 1 {
index--
}
nodes[index].u.next = math.MaxUint32
for index != 0 {
var len uint = uint(zopfliNodeCommandLength(&nodes[index]))
index -= uint(len)
nodes[index].u.next = uint32(len)
num_commands++
}
return num_commands
}
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
func zopfliCreateCommands(num_bytes uint, block_start uint, nodes []zopfliNode, dist_cache []int, last_insert_len *uint, params *encoderParams, commands *[]command, num_literals *uint) {
var max_backward_limit uint = maxBackwardLimit(params.lgwin)
var pos uint = 0
var offset uint32 = nodes[0].u.next
var i uint
var gap uint = 0
for i = 0; offset != math.MaxUint32; i++ {
var next *zopfliNode = &nodes[uint32(pos)+offset]
var copy_length uint = uint(zopfliNodeCopyLength(next))
var insert_length uint = uint(next.dcode_insert_length & 0x7FFFFFF)
pos += insert_length
offset = next.u.next
if i == 0 {
insert_length += *last_insert_len
*last_insert_len = 0
}
{
var distance uint = uint(zopfliNodeCopyDistance(next))
var len_code uint = uint(zopfliNodeLengthCode(next))
var max_distance uint = brotli_min_size_t(block_start+pos, max_backward_limit)
var is_dictionary bool = (distance > max_distance+gap)
var dist_code uint = uint(zopfliNodeDistanceCode(next))
*commands = append(*commands, makeCommand(&params.dist, insert_length, copy_length, int(len_code)-int(copy_length), dist_code))
if !is_dictionary && dist_code > 0 {
dist_cache[3] = dist_cache[2]
dist_cache[2] = dist_cache[1]
dist_cache[1] = dist_cache[0]
dist_cache[0] = int(distance)
}
}
*num_literals += insert_length
pos += copy_length
}
*last_insert_len += num_bytes - pos
}
func zopfliIterate(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, gap uint, dist_cache []int, model *zopfliCostModel, num_matches []uint32, matches []backwardMatch, nodes []zopfliNode) uint {
var max_backward_limit uint = maxBackwardLimit(params.lgwin)
var max_zopfli_len uint = maxZopfliLen(params)
var queue startPosQueue
var cur_match_pos uint = 0
var i uint
nodes[0].length = 0
nodes[0].u.cost = 0
initStartPosQueue(&queue)
for i = 0; i+3 < num_bytes; i++ {
var skip uint = updateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, uint(num_matches[i]), matches[cur_match_pos:], model, &queue, nodes)
if skip < longCopyQuickStep {
skip = 0
}
cur_match_pos += uint(num_matches[i])
if num_matches[i] == 1 && backwardMatchLength(&matches[cur_match_pos-1]) > max_zopfli_len {
skip = brotli_max_size_t(backwardMatchLength(&matches[cur_match_pos-1]), skip)
}
if skip > 1 {
skip--
for skip != 0 {
i++
if i+3 >= num_bytes {
break
}
evaluateNode(position, i, max_backward_limit, gap, dist_cache, model, &queue, nodes)
cur_match_pos += uint(num_matches[i])
skip--
}
}
}
return computeShortestPathFromNodes(num_bytes, nodes)
}
/* Computes the shortest path of commands from position to at most
position + num_bytes.
On return, path->size() is the number of commands found and path[i] is the
length of the i-th command (copy length plus insert length).
Note that the sum of the lengths of all commands can be less than num_bytes.
On return, the nodes[0..num_bytes] array will have the following
"ZopfliNode array invariant":
For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
(1) nodes[i].copy_length() >= 2
(2) nodes[i].command_length() <= i and
(3) nodes[i - nodes[i].command_length()].cost < kInfinity
REQUIRES: nodes != nil and len(nodes) >= num_bytes + 1 */
func zopfliComputeShortestPath(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, dist_cache []int, hasher *h10, nodes []zopfliNode) uint {
var max_backward_limit uint = maxBackwardLimit(params.lgwin)
var max_zopfli_len uint = maxZopfliLen(params)
var model zopfliCostModel
var queue startPosQueue
var matches [2 * (maxNumMatchesH10 + 64)]backwardMatch
var store_end uint
if num_bytes >= hasher.StoreLookahead() {
store_end = position + num_bytes - hasher.StoreLookahead() + 1
} else {
store_end = position
}
var i uint
var gap uint = 0
var lz_matches_offset uint = 0
nodes[0].length = 0
nodes[0].u.cost = 0
initZopfliCostModel(&model, &params.dist, num_bytes)
zopfliCostModelSetFromLiteralCosts(&model, position, ringbuffer, ringbuffer_mask)
initStartPosQueue(&queue)
for i = 0; i+hasher.HashTypeLength()-1 < num_bytes; i++ {
var pos uint = position + i
var max_distance uint = brotli_min_size_t(pos, max_backward_limit)
var skip uint
var num_matches uint
num_matches = findAllMatchesH10(hasher, &params.dictionary, ringbuffer, ringbuffer_mask, pos, num_bytes-i, max_distance, gap, params, matches[lz_matches_offset:])
if num_matches > 0 && backwardMatchLength(&matches[num_matches-1]) > max_zopfli_len {
matches[0] = matches[num_matches-1]
num_matches = 1
}
skip = updateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, num_matches, matches[:], &model, &queue, nodes)
if skip < longCopyQuickStep {
skip = 0
}
if num_matches == 1 && backwardMatchLength(&matches[0]) > max_zopfli_len {
skip = brotli_max_size_t(backwardMatchLength(&matches[0]), skip)
}
if skip > 1 {
/* Add the tail of the copy to the hasher. */
hasher.StoreRange(ringbuffer, ringbuffer_mask, pos+1, brotli_min_size_t(pos+skip, store_end))
skip--
for skip != 0 {
i++
if i+hasher.HashTypeLength()-1 >= num_bytes {
break
}
evaluateNode(position, i, max_backward_limit, gap, dist_cache, &model, &queue, nodes)
skip--
}
}
}
cleanupZopfliCostModel(&model)
return computeShortestPathFromNodes(num_bytes, nodes)
}
func createZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher *h10, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) {
var nodes []zopfliNode
nodes = make([]zopfliNode, (num_bytes + 1))
initZopfliNodes(nodes, num_bytes+1)
zopfliComputeShortestPath(num_bytes, position, ringbuffer, ringbuffer_mask, params, dist_cache, hasher, nodes)
zopfliCreateCommands(num_bytes, position, nodes, dist_cache, last_insert_len, params, commands, num_literals)
nodes = nil
}
func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) {
var max_backward_limit uint = maxBackwardLimit(params.lgwin)
var num_matches []uint32 = make([]uint32, num_bytes)
var matches_size uint = 4 * num_bytes
var store_end uint
if num_bytes >= hasher.StoreLookahead() {
store_end = position + num_bytes - hasher.StoreLookahead() + 1
} else {
store_end = position
}
var cur_match_pos uint = 0
var i uint
var orig_num_literals uint
var orig_last_insert_len uint
var orig_dist_cache [4]int
var orig_num_commands int
var model zopfliCostModel
var nodes []zopfliNode
var matches []backwardMatch = make([]backwardMatch, matches_size)
var gap uint = 0
var shadow_matches uint = 0
var new_array []backwardMatch
for i = 0; i+hasher.HashTypeLength()-1 < num_bytes; i++ {
var pos uint = position + i
var max_distance uint = brotli_min_size_t(pos, max_backward_limit)
var max_length uint = num_bytes - i
var num_found_matches uint
var cur_match_end uint
var j uint
/* Ensure that we have enough free slots. */
if matches_size < cur_match_pos+maxNumMatchesH10+shadow_matches {
var new_size uint = matches_size
if new_size == 0 {
new_size = cur_match_pos + maxNumMatchesH10 + shadow_matches
}
for new_size < cur_match_pos+maxNumMatchesH10+shadow_matches {
new_size *= 2
}
new_array = make([]backwardMatch, new_size)
if matches_size != 0 {
copy(new_array, matches[:matches_size])
}
matches = new_array
matches_size = new_size
}
num_found_matches = findAllMatchesH10(hasher.(*h10), &params.dictionary, ringbuffer, ringbuffer_mask, pos, max_length, max_distance, gap, params, matches[cur_match_pos+shadow_matches:])
cur_match_end = cur_match_pos + num_found_matches
for j = cur_match_pos; j+1 < cur_match_end; j++ {
assert(backwardMatchLength(&matches[j]) <= backwardMatchLength(&matches[j+1]))
}
num_matches[i] = uint32(num_found_matches)
if num_found_matches > 0 {
var match_len uint = backwardMatchLength(&matches[cur_match_end-1])
if match_len > maxZopfliLenQuality11 {
var skip uint = match_len - 1
matches[cur_match_pos] = matches[cur_match_end-1]
cur_match_pos++
num_matches[i] = 1
/* Add the tail of the copy to the hasher. */
hasher.StoreRange(ringbuffer, ringbuffer_mask, pos+1, brotli_min_size_t(pos+match_len, store_end))
var pos uint = i
for i := 0; i < int(skip); i++ {
num_matches[pos+1:][i] = 0
}
i += skip
} else {
cur_match_pos = cur_match_end
}
}
}
orig_num_literals = *num_literals
orig_last_insert_len = *last_insert_len
copy(orig_dist_cache[:], dist_cache[:4])
orig_num_commands = len(*commands)
nodes = make([]zopfliNode, (num_bytes + 1))
initZopfliCostModel(&model, &params.dist, num_bytes)
for i = 0; i < 2; i++ {
initZopfliNodes(nodes, num_bytes+1)
if i == 0 {
zopfliCostModelSetFromLiteralCosts(&model, position, ringbuffer, ringbuffer_mask)
} else {
zopfliCostModelSetFromCommands(&model, position, ringbuffer, ringbuffer_mask, (*commands)[orig_num_commands:], orig_last_insert_len)
}
*commands = (*commands)[:orig_num_commands]
*num_literals = orig_num_literals
*last_insert_len = orig_last_insert_len
copy(dist_cache, orig_dist_cache[:4])
zopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches, nodes)
zopfliCreateCommands(num_bytes, position, nodes, dist_cache, last_insert_len, params, commands, num_literals)
}
cleanupZopfliCostModel(&model)
nodes = nil
matches = nil
num_matches = nil
}
+436
View File
@@ -0,0 +1,436 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions to estimate the bit cost of Huffman trees. */
func shannonEntropy(population []uint32, size uint, total *uint) float64 {
var sum uint = 0
var retval float64 = 0
var population_end []uint32 = population[size:]
var p uint
for -cap(population) < -cap(population_end) {
p = uint(population[0])
population = population[1:]
sum += p
retval -= float64(p) * fastLog2(p)
}
if sum != 0 {
retval += float64(sum) * fastLog2(sum)
}
*total = sum
return retval
}
func bitsEntropy(population []uint32, size uint) float64 {
var sum uint
var retval float64 = shannonEntropy(population, size, &sum)
if retval < float64(sum) {
/* At least one bit per literal is needed. */
retval = float64(sum)
}
return retval
}
const kOneSymbolHistogramCost float64 = 12
const kTwoSymbolHistogramCost float64 = 20
const kThreeSymbolHistogramCost float64 = 28
const kFourSymbolHistogramCost float64 = 37
func populationCostLiteral(histogram *histogramLiteral) float64 {
var data_size uint = histogramDataSizeLiteral()
var count int = 0
var s [5]uint
var bits float64 = 0.0
var i uint
if histogram.total_count_ == 0 {
return kOneSymbolHistogramCost
}
for i = 0; i < data_size; i++ {
if histogram.data_[i] > 0 {
s[count] = i
count++
if count > 4 {
break
}
}
}
if count == 1 {
return kOneSymbolHistogramCost
}
if count == 2 {
return kTwoSymbolHistogramCost + float64(histogram.total_count_)
}
if count == 3 {
var histo0 uint32 = histogram.data_[s[0]]
var histo1 uint32 = histogram.data_[s[1]]
var histo2 uint32 = histogram.data_[s[2]]
var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
}
if count == 4 {
var histo [4]uint32
var h23 uint32
var histomax uint32
for i = 0; i < 4; i++ {
histo[i] = histogram.data_[s[i]]
}
/* Sort */
for i = 0; i < 4; i++ {
var j uint
for j = i + 1; j < 4; j++ {
if histo[j] > histo[i] {
var tmp uint32 = histo[j]
histo[j] = histo[i]
histo[i] = tmp
}
}
}
h23 = histo[2] + histo[3]
histomax = brotli_max_uint32_t(h23, histo[0])
return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
}
{
var max_depth uint = 1
var depth_histo = [codeLengthCodes]uint32{0}
/* In this loop we compute the entropy of the histogram and simultaneously
build a simplified histogram of the code length codes where we use the
zero repeat code 17, but we don't use the non-zero repeat code 16. */
var log2total float64 = fastLog2(histogram.total_count_)
for i = 0; i < data_size; {
if histogram.data_[i] > 0 {
var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
= log2(total_count) - log2(count(symbol)) */
var depth uint = uint(log2p + 0.5)
/* Approximate the bit depth by round(-log2(P(symbol))) */
bits += float64(histogram.data_[i]) * log2p
if depth > 15 {
depth = 15
}
if depth > max_depth {
max_depth = depth
}
depth_histo[depth]++
i++
} else {
var reps uint32 = 1
/* Compute the run length of zeros and add the appropriate number of 0
and 17 code length codes to the code length code histogram. */
var k uint
for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
reps++
}
i += uint(reps)
if i == data_size {
/* Don't add any cost for the last zero run, since these are encoded
only implicitly. */
break
}
if reps < 3 {
depth_histo[0] += reps
} else {
reps -= 2
for reps > 0 {
depth_histo[repeatZeroCodeLength]++
/* Add the 3 extra bits for the 17 code length code. */
bits += 3
reps >>= 3
}
}
}
}
/* Add the estimated encoding cost of the code length code histogram. */
bits += float64(18 + 2*max_depth)
/* Add the entropy of the code length code histogram. */
bits += bitsEntropy(depth_histo[:], codeLengthCodes)
}
return bits
}
func populationCostCommand(histogram *histogramCommand) float64 {
var data_size uint = histogramDataSizeCommand()
var count int = 0
var s [5]uint
var bits float64 = 0.0
var i uint
if histogram.total_count_ == 0 {
return kOneSymbolHistogramCost
}
for i = 0; i < data_size; i++ {
if histogram.data_[i] > 0 {
s[count] = i
count++
if count > 4 {
break
}
}
}
if count == 1 {
return kOneSymbolHistogramCost
}
if count == 2 {
return kTwoSymbolHistogramCost + float64(histogram.total_count_)
}
if count == 3 {
var histo0 uint32 = histogram.data_[s[0]]
var histo1 uint32 = histogram.data_[s[1]]
var histo2 uint32 = histogram.data_[s[2]]
var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
}
if count == 4 {
var histo [4]uint32
var h23 uint32
var histomax uint32
for i = 0; i < 4; i++ {
histo[i] = histogram.data_[s[i]]
}
/* Sort */
for i = 0; i < 4; i++ {
var j uint
for j = i + 1; j < 4; j++ {
if histo[j] > histo[i] {
var tmp uint32 = histo[j]
histo[j] = histo[i]
histo[i] = tmp
}
}
}
h23 = histo[2] + histo[3]
histomax = brotli_max_uint32_t(h23, histo[0])
return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
}
{
var max_depth uint = 1
var depth_histo = [codeLengthCodes]uint32{0}
/* In this loop we compute the entropy of the histogram and simultaneously
build a simplified histogram of the code length codes where we use the
zero repeat code 17, but we don't use the non-zero repeat code 16. */
var log2total float64 = fastLog2(histogram.total_count_)
for i = 0; i < data_size; {
if histogram.data_[i] > 0 {
var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
= log2(total_count) - log2(count(symbol)) */
var depth uint = uint(log2p + 0.5)
/* Approximate the bit depth by round(-log2(P(symbol))) */
bits += float64(histogram.data_[i]) * log2p
if depth > 15 {
depth = 15
}
if depth > max_depth {
max_depth = depth
}
depth_histo[depth]++
i++
} else {
var reps uint32 = 1
/* Compute the run length of zeros and add the appropriate number of 0
and 17 code length codes to the code length code histogram. */
var k uint
for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
reps++
}
i += uint(reps)
if i == data_size {
/* Don't add any cost for the last zero run, since these are encoded
only implicitly. */
break
}
if reps < 3 {
depth_histo[0] += reps
} else {
reps -= 2
for reps > 0 {
depth_histo[repeatZeroCodeLength]++
/* Add the 3 extra bits for the 17 code length code. */
bits += 3
reps >>= 3
}
}
}
}
/* Add the estimated encoding cost of the code length code histogram. */
bits += float64(18 + 2*max_depth)
/* Add the entropy of the code length code histogram. */
bits += bitsEntropy(depth_histo[:], codeLengthCodes)
}
return bits
}
func populationCostDistance(histogram *histogramDistance) float64 {
var data_size uint = histogramDataSizeDistance()
var count int = 0
var s [5]uint
var bits float64 = 0.0
var i uint
if histogram.total_count_ == 0 {
return kOneSymbolHistogramCost
}
for i = 0; i < data_size; i++ {
if histogram.data_[i] > 0 {
s[count] = i
count++
if count > 4 {
break
}
}
}
if count == 1 {
return kOneSymbolHistogramCost
}
if count == 2 {
return kTwoSymbolHistogramCost + float64(histogram.total_count_)
}
if count == 3 {
var histo0 uint32 = histogram.data_[s[0]]
var histo1 uint32 = histogram.data_[s[1]]
var histo2 uint32 = histogram.data_[s[2]]
var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
}
if count == 4 {
var histo [4]uint32
var h23 uint32
var histomax uint32
for i = 0; i < 4; i++ {
histo[i] = histogram.data_[s[i]]
}
/* Sort */
for i = 0; i < 4; i++ {
var j uint
for j = i + 1; j < 4; j++ {
if histo[j] > histo[i] {
var tmp uint32 = histo[j]
histo[j] = histo[i]
histo[i] = tmp
}
}
}
h23 = histo[2] + histo[3]
histomax = brotli_max_uint32_t(h23, histo[0])
return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
}
{
var max_depth uint = 1
var depth_histo = [codeLengthCodes]uint32{0}
/* In this loop we compute the entropy of the histogram and simultaneously
build a simplified histogram of the code length codes where we use the
zero repeat code 17, but we don't use the non-zero repeat code 16. */
var log2total float64 = fastLog2(histogram.total_count_)
for i = 0; i < data_size; {
if histogram.data_[i] > 0 {
var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
= log2(total_count) - log2(count(symbol)) */
var depth uint = uint(log2p + 0.5)
/* Approximate the bit depth by round(-log2(P(symbol))) */
bits += float64(histogram.data_[i]) * log2p
if depth > 15 {
depth = 15
}
if depth > max_depth {
max_depth = depth
}
depth_histo[depth]++
i++
} else {
var reps uint32 = 1
/* Compute the run length of zeros and add the appropriate number of 0
and 17 code length codes to the code length code histogram. */
var k uint
for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
reps++
}
i += uint(reps)
if i == data_size {
/* Don't add any cost for the last zero run, since these are encoded
only implicitly. */
break
}
if reps < 3 {
depth_histo[0] += reps
} else {
reps -= 2
for reps > 0 {
depth_histo[repeatZeroCodeLength]++
/* Add the 3 extra bits for the 17 code length code. */
bits += 3
reps >>= 3
}
}
}
}
/* Add the estimated encoding cost of the code length code histogram. */
bits += float64(18 + 2*max_depth)
/* Add the entropy of the code length code histogram. */
bits += bitsEntropy(depth_histo[:], codeLengthCodes)
}
return bits
}
+266
View File
@@ -0,0 +1,266 @@
package brotli
import "encoding/binary"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Bit reading helpers */
const shortFillBitWindowRead = (8 >> 1)
var kBitMask = [33]uint32{
0x00000000,
0x00000001,
0x00000003,
0x00000007,
0x0000000F,
0x0000001F,
0x0000003F,
0x0000007F,
0x000000FF,
0x000001FF,
0x000003FF,
0x000007FF,
0x00000FFF,
0x00001FFF,
0x00003FFF,
0x00007FFF,
0x0000FFFF,
0x0001FFFF,
0x0003FFFF,
0x0007FFFF,
0x000FFFFF,
0x001FFFFF,
0x003FFFFF,
0x007FFFFF,
0x00FFFFFF,
0x01FFFFFF,
0x03FFFFFF,
0x07FFFFFF,
0x0FFFFFFF,
0x1FFFFFFF,
0x3FFFFFFF,
0x7FFFFFFF,
0xFFFFFFFF,
}
func bitMask(n uint32) uint32 {
return kBitMask[n]
}
type bitReader struct {
val_ uint64
bit_pos_ uint32
input []byte
input_len uint
byte_pos uint
}
type bitReaderState struct {
val_ uint64
bit_pos_ uint32
input []byte
input_len uint
byte_pos uint
}
/* Initializes the BrotliBitReader fields. */
/* Ensures that accumulator is not empty.
May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
Returns false if data is required but there is no input available.
For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
reading. */
func bitReaderSaveState(from *bitReader, to *bitReaderState) {
to.val_ = from.val_
to.bit_pos_ = from.bit_pos_
to.input = from.input
to.input_len = from.input_len
to.byte_pos = from.byte_pos
}
func bitReaderRestoreState(to *bitReader, from *bitReaderState) {
to.val_ = from.val_
to.bit_pos_ = from.bit_pos_
to.input = from.input
to.input_len = from.input_len
to.byte_pos = from.byte_pos
}
func getAvailableBits(br *bitReader) uint32 {
return 64 - br.bit_pos_
}
/* Returns amount of unread bytes the bit reader still has buffered from the
BrotliInput, including whole bytes in br->val_. */
func getRemainingBytes(br *bitReader) uint {
return uint(uint32(br.input_len-br.byte_pos) + (getAvailableBits(br) >> 3))
}
/* Checks if there is at least |num| bytes left in the input ring-buffer
(excluding the bits remaining in br->val_). */
func checkInputAmount(br *bitReader, num uint) bool {
return br.input_len-br.byte_pos >= num
}
/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
Precondition: accumulator contains at least 1 bit.
|n_bits| should be in the range [1..24] for regular build. For portable
non-64-bit little-endian build only 16 bits are safe to request. */
func fillBitWindow(br *bitReader, n_bits uint32) {
if br.bit_pos_ >= 32 {
br.val_ >>= 32
br.bit_pos_ ^= 32 /* here same as -= 32 because of the if condition */
br.val_ |= (uint64(binary.LittleEndian.Uint32(br.input[br.byte_pos:]))) << 32
br.byte_pos += 4
}
}
/* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
func fillBitWindow16(br *bitReader) {
fillBitWindow(br, 17)
}
/* Tries to pull one byte of input to accumulator.
Returns false if there is no input available. */
func pullByte(br *bitReader) bool {
if br.byte_pos == br.input_len {
return false
}
br.val_ >>= 8
br.val_ |= (uint64(br.input[br.byte_pos])) << 56
br.bit_pos_ -= 8
br.byte_pos++
return true
}
/* Returns currently available bits.
The number of valid bits could be calculated by BrotliGetAvailableBits. */
func getBitsUnmasked(br *bitReader) uint64 {
return br.val_ >> br.bit_pos_
}
/* Like BrotliGetBits, but does not mask the result.
The result contains at least 16 valid bits. */
func get16BitsUnmasked(br *bitReader) uint32 {
fillBitWindow(br, 16)
return uint32(getBitsUnmasked(br))
}
/* Returns the specified number of bits from |br| without advancing bit
position. */
func getBits(br *bitReader, n_bits uint32) uint32 {
fillBitWindow(br, n_bits)
return uint32(getBitsUnmasked(br)) & bitMask(n_bits)
}
/* Tries to peek the specified amount of bits. Returns false, if there
is not enough input. */
func safeGetBits(br *bitReader, n_bits uint32, val *uint32) bool {
for getAvailableBits(br) < n_bits {
if !pullByte(br) {
return false
}
}
*val = uint32(getBitsUnmasked(br)) & bitMask(n_bits)
return true
}
/* Advances the bit pos by |n_bits|. */
func dropBits(br *bitReader, n_bits uint32) {
br.bit_pos_ += n_bits
}
func bitReaderUnload(br *bitReader) {
var unused_bytes uint32 = getAvailableBits(br) >> 3
var unused_bits uint32 = unused_bytes << 3
br.byte_pos -= uint(unused_bytes)
if unused_bits == 64 {
br.val_ = 0
} else {
br.val_ <<= unused_bits
}
br.bit_pos_ += unused_bits
}
/* Reads the specified number of bits from |br| and advances the bit pos.
Precondition: accumulator MUST contain at least |n_bits|. */
func takeBits(br *bitReader, n_bits uint32, val *uint32) {
*val = uint32(getBitsUnmasked(br)) & bitMask(n_bits)
dropBits(br, n_bits)
}
/* Reads the specified number of bits from |br| and advances the bit pos.
Assumes that there is enough input to perform BrotliFillBitWindow. */
func readBits(br *bitReader, n_bits uint32) uint32 {
var val uint32
fillBitWindow(br, n_bits)
takeBits(br, n_bits, &val)
return val
}
/* Tries to read the specified amount of bits. Returns false, if there
is not enough input. |n_bits| MUST be positive. */
func safeReadBits(br *bitReader, n_bits uint32, val *uint32) bool {
for getAvailableBits(br) < n_bits {
if !pullByte(br) {
return false
}
}
takeBits(br, n_bits, val)
return true
}
/* Advances the bit reader position to the next byte boundary and verifies
that any skipped bits are set to zero. */
func bitReaderJumpToByteBoundary(br *bitReader) bool {
var pad_bits_count uint32 = getAvailableBits(br) & 0x7
var pad_bits uint32 = 0
if pad_bits_count != 0 {
takeBits(br, pad_bits_count, &pad_bits)
}
return pad_bits == 0
}
/* Copies remaining input bytes stored in the bit reader to the output. Value
|num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
warmed up again after this. */
func copyBytes(dest []byte, br *bitReader, num uint) {
for getAvailableBits(br) >= 8 && num > 0 {
dest[0] = byte(getBitsUnmasked(br))
dropBits(br, 8)
dest = dest[1:]
num--
}
copy(dest, br.input[br.byte_pos:][:num])
br.byte_pos += num
}
func initBitReader(br *bitReader) {
br.val_ = 0
br.bit_pos_ = 64
}
func warmupBitReader(br *bitReader) bool {
/* Fixing alignment after unaligned BrotliFillWindow would result accumulator
overflow. If unalignment is caused by BrotliSafeReadBits, then there is
enough space in accumulator to fix alignment. */
if getAvailableBits(br) == 0 {
if !pullByte(br) {
return false
}
}
return true
}
+56
View File
@@ -0,0 +1,56 @@
package brotli
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Write bits into a byte array. */
type bitWriter struct {
dst []byte
// Data waiting to be written is the low nbits of bits.
bits uint64
nbits uint
}
func (w *bitWriter) writeBits(nb uint, b uint64) {
w.bits |= b << w.nbits
w.nbits += nb
if w.nbits >= 32 {
bits := w.bits
w.bits >>= 32
w.nbits -= 32
w.dst = append(w.dst,
byte(bits),
byte(bits>>8),
byte(bits>>16),
byte(bits>>24),
)
}
}
func (w *bitWriter) writeSingleBit(bit bool) {
if bit {
w.writeBits(1, 1)
} else {
w.writeBits(1, 0)
}
}
func (w *bitWriter) jumpToByteBoundary() {
dst := w.dst
for w.nbits != 0 {
dst = append(dst, byte(w.bits))
w.bits >>= 8
if w.nbits > 8 { // Avoid underflow
w.nbits -= 8
} else {
w.nbits = 0
}
}
w.bits = 0
w.dst = dst
}
+144
View File
@@ -0,0 +1,144 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Block split point selection utilities. */
type blockSplit struct {
num_types uint
num_blocks uint
types []byte
lengths []uint32
types_alloc_size uint
lengths_alloc_size uint
}
const (
kMaxLiteralHistograms uint = 100
kMaxCommandHistograms uint = 50
kLiteralBlockSwitchCost float64 = 28.1
kCommandBlockSwitchCost float64 = 13.5
kDistanceBlockSwitchCost float64 = 14.6
kLiteralStrideLength uint = 70
kCommandStrideLength uint = 40
kSymbolsPerLiteralHistogram uint = 544
kSymbolsPerCommandHistogram uint = 530
kSymbolsPerDistanceHistogram uint = 544
kMinLengthForBlockSplitting uint = 128
kIterMulForRefining uint = 2
kMinItersForRefining uint = 100
)
func countLiterals(cmds []command) uint {
var total_length uint = 0
/* Count how many we have. */
for i := range cmds {
total_length += uint(cmds[i].insert_len_)
}
return total_length
}
func copyLiteralsToByteArray(cmds []command, data []byte, offset uint, mask uint, literals []byte) {
var pos uint = 0
var from_pos uint = offset & mask
for i := range cmds {
var insert_len uint = uint(cmds[i].insert_len_)
if from_pos+insert_len > mask {
var head_size uint = mask + 1 - from_pos
copy(literals[pos:], data[from_pos:][:head_size])
from_pos = 0
pos += head_size
insert_len -= head_size
}
if insert_len > 0 {
copy(literals[pos:], data[from_pos:][:insert_len])
pos += insert_len
}
from_pos = uint((uint32(from_pos+insert_len) + commandCopyLen(&cmds[i])) & uint32(mask))
}
}
func myRand(seed *uint32) uint32 {
/* Initial seed should be 7. In this case, loop length is (1 << 29). */
*seed *= 16807
return *seed
}
func bitCost(count uint) float64 {
if count == 0 {
return -2.0
} else {
return fastLog2(count)
}
}
const histogramsPerBatch = 64
const clustersPerBatch = 16
func initBlockSplit(self *blockSplit) {
self.num_types = 0
self.num_blocks = 0
self.types = self.types[:0]
self.lengths = self.lengths[:0]
self.types_alloc_size = 0
self.lengths_alloc_size = 0
}
func splitBlock(cmds []command, data []byte, pos uint, mask uint, params *encoderParams, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit) {
{
var literals_count uint = countLiterals(cmds)
var literals []byte = make([]byte, literals_count)
/* Create a continuous array of literals. */
copyLiteralsToByteArray(cmds, data, pos, mask, literals)
/* Create the block split on the array of literals.
Literal histograms have alphabet size 256. */
splitByteVectorLiteral(literals, literals_count, kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, kLiteralStrideLength, kLiteralBlockSwitchCost, params, literal_split)
literals = nil
}
{
var insert_and_copy_codes []uint16 = make([]uint16, len(cmds))
/* Compute prefix codes for commands. */
for i := range cmds {
insert_and_copy_codes[i] = cmds[i].cmd_prefix_
}
/* Create the block split on the array of command prefixes. */
splitByteVectorCommand(insert_and_copy_codes, kSymbolsPerCommandHistogram, kMaxCommandHistograms, kCommandStrideLength, kCommandBlockSwitchCost, params, insert_and_copy_split)
/* TODO: reuse for distances? */
insert_and_copy_codes = nil
}
{
var distance_prefixes []uint16 = make([]uint16, len(cmds))
var j uint = 0
/* Create a continuous array of distance prefixes. */
for i := range cmds {
var cmd *command = &cmds[i]
if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
distance_prefixes[j] = cmd.dist_prefix_ & 0x3FF
j++
}
}
/* Create the block split on the array of distance prefixes. */
splitByteVectorDistance(distance_prefixes, j, kSymbolsPerDistanceHistogram, kMaxCommandHistograms, kCommandStrideLength, kDistanceBlockSwitchCost, params, dist_split)
distance_prefixes = nil
}
}
+434
View File
@@ -0,0 +1,434 @@
package brotli
import "math"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func initialEntropyCodesCommand(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramCommand) {
var seed uint32 = 7
var block_length uint = length / num_histograms
var i uint
clearHistogramsCommand(histograms, num_histograms)
for i = 0; i < num_histograms; i++ {
var pos uint = length * i / num_histograms
if i != 0 {
pos += uint(myRand(&seed) % uint32(block_length))
}
if pos+stride >= length {
pos = length - stride - 1
}
histogramAddVectorCommand(&histograms[i], data[pos:], stride)
}
}
func randomSampleCommand(seed *uint32, data []uint16, length uint, stride uint, sample *histogramCommand) {
var pos uint = 0
if stride >= length {
stride = length
} else {
pos = uint(myRand(seed) % uint32(length-stride+1))
}
histogramAddVectorCommand(sample, data[pos:], stride)
}
func refineEntropyCodesCommand(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramCommand) {
var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
var seed uint32 = 7
var iter uint
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
for iter = 0; iter < iters; iter++ {
var sample histogramCommand
histogramClearCommand(&sample)
randomSampleCommand(&seed, data, length, stride, &sample)
histogramAddHistogramCommand(&histograms[iter%num_histograms], &sample)
}
}
/* Assigns a block id from the range [0, num_histograms) to each data element
in data[0..length) and fills in block_id[0..length) with the assigned values.
Returns the number of blocks, i.e. one plus the number of block switches. */
func findBlocksCommand(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramCommand, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
var data_size uint = histogramDataSizeCommand()
var bitmaplen uint = (num_histograms + 7) >> 3
var num_blocks uint = 1
var i uint
var j uint
assert(num_histograms <= 256)
if num_histograms <= 1 {
for i = 0; i < length; i++ {
block_id[i] = 0
}
return 1
}
for i := 0; i < int(data_size*num_histograms); i++ {
insert_cost[i] = 0
}
for i = 0; i < num_histograms; i++ {
insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
}
for i = data_size; i != 0; {
i--
for j = 0; j < num_histograms; j++ {
insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
}
}
for i := 0; i < int(num_histograms); i++ {
cost[i] = 0
}
for i := 0; i < int(length*bitmaplen); i++ {
switch_signal[i] = 0
}
/* After each iteration of this loop, cost[k] will contain the difference
between the minimum cost of arriving at the current byte position using
entropy code k, and the minimum cost of arriving at the current byte
position. This difference is capped at the block switch cost, and if it
reaches block switch cost, it means that when we trace back from the last
position, we need to switch here. */
for i = 0; i < length; i++ {
var byte_ix uint = i
var ix uint = byte_ix * bitmaplen
var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
var min_cost float64 = 1e99
var block_switch_cost float64 = block_switch_bitcost
var k uint
for k = 0; k < num_histograms; k++ {
/* We are coding the symbol in data[byte_ix] with entropy code k. */
cost[k] += insert_cost[insert_cost_ix+k]
if cost[k] < min_cost {
min_cost = cost[k]
block_id[byte_ix] = byte(k)
}
}
/* More blocks for the beginning. */
if byte_ix < 2000 {
block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
}
for k = 0; k < num_histograms; k++ {
cost[k] -= min_cost
if cost[k] >= block_switch_cost {
var mask byte = byte(1 << (k & 7))
cost[k] = block_switch_cost
assert(k>>3 < bitmaplen)
switch_signal[ix+(k>>3)] |= mask
/* Trace back from the last position and switch at the marked places. */
}
}
}
{
var byte_ix uint = length - 1
var ix uint = byte_ix * bitmaplen
var cur_id byte = block_id[byte_ix]
for byte_ix > 0 {
var mask byte = byte(1 << (cur_id & 7))
assert(uint(cur_id)>>3 < bitmaplen)
byte_ix--
ix -= bitmaplen
if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
if cur_id != block_id[byte_ix] {
cur_id = block_id[byte_ix]
num_blocks++
}
}
block_id[byte_ix] = cur_id
}
}
return num_blocks
}
var remapBlockIdsCommand_kInvalidId uint16 = 256
func remapBlockIdsCommand(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
var next_id uint16 = 0
var i uint
for i = 0; i < num_histograms; i++ {
new_id[i] = remapBlockIdsCommand_kInvalidId
}
for i = 0; i < length; i++ {
assert(uint(block_ids[i]) < num_histograms)
if new_id[block_ids[i]] == remapBlockIdsCommand_kInvalidId {
new_id[block_ids[i]] = next_id
next_id++
}
}
for i = 0; i < length; i++ {
block_ids[i] = byte(new_id[block_ids[i]])
assert(uint(block_ids[i]) < num_histograms)
}
assert(uint(next_id) <= num_histograms)
return uint(next_id)
}
func buildBlockHistogramsCommand(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramCommand) {
var i uint
clearHistogramsCommand(histograms, num_histograms)
for i = 0; i < length; i++ {
histogramAddCommand(&histograms[block_ids[i]], uint(data[i]))
}
}
var clusterBlocksCommand_kInvalidIndex uint32 = math.MaxUint32
func clusterBlocksCommand(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
var histogram_symbols []uint32 = make([]uint32, num_blocks)
var block_lengths []uint32 = make([]uint32, num_blocks)
var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
var all_histograms_size uint = 0
var all_histograms_capacity uint = expected_num_clusters
var all_histograms []histogramCommand = make([]histogramCommand, all_histograms_capacity)
var cluster_size_size uint = 0
var cluster_size_capacity uint = expected_num_clusters
var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
var num_clusters uint = 0
var histograms []histogramCommand = make([]histogramCommand, brotli_min_size_t(num_blocks, histogramsPerBatch))
var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
var pairs_capacity uint = max_num_pairs + 1
var pairs []histogramPair = make([]histogramPair, pairs_capacity)
var pos uint = 0
var clusters []uint32
var num_final_clusters uint
var new_index []uint32
var i uint
var sizes = [histogramsPerBatch]uint32{0}
var new_clusters = [histogramsPerBatch]uint32{0}
var symbols = [histogramsPerBatch]uint32{0}
var remap = [histogramsPerBatch]uint32{0}
for i := 0; i < int(num_blocks); i++ {
block_lengths[i] = 0
}
{
var block_idx uint = 0
for i = 0; i < length; i++ {
assert(block_idx < num_blocks)
block_lengths[block_idx]++
if i+1 == length || block_ids[i] != block_ids[i+1] {
block_idx++
}
}
assert(block_idx == num_blocks)
}
for i = 0; i < num_blocks; i += histogramsPerBatch {
var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
var num_new_clusters uint
var j uint
for j = 0; j < num_to_combine; j++ {
var k uint
histogramClearCommand(&histograms[j])
for k = 0; uint32(k) < block_lengths[i+j]; k++ {
histogramAddCommand(&histograms[j], uint(data[pos]))
pos++
}
histograms[j].bit_cost_ = populationCostCommand(&histograms[j])
new_clusters[j] = uint32(j)
symbols[j] = uint32(j)
sizes[j] = 1
}
num_new_clusters = histogramCombineCommand(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
var _new_size uint
if all_histograms_capacity == 0 {
_new_size = all_histograms_size + num_new_clusters
} else {
_new_size = all_histograms_capacity
}
var new_array []histogramCommand
for _new_size < (all_histograms_size + num_new_clusters) {
_new_size *= 2
}
new_array = make([]histogramCommand, _new_size)
if all_histograms_capacity != 0 {
copy(new_array, all_histograms[:all_histograms_capacity])
}
all_histograms = new_array
all_histograms_capacity = _new_size
}
brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
for j = 0; j < num_new_clusters; j++ {
all_histograms[all_histograms_size] = histograms[new_clusters[j]]
all_histograms_size++
cluster_size[cluster_size_size] = sizes[new_clusters[j]]
cluster_size_size++
remap[new_clusters[j]] = uint32(j)
}
for j = 0; j < num_to_combine; j++ {
histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
}
num_clusters += num_new_clusters
assert(num_clusters == cluster_size_size)
assert(num_clusters == all_histograms_size)
}
histograms = nil
max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
if pairs_capacity < max_num_pairs+1 {
pairs = nil
pairs = make([]histogramPair, (max_num_pairs + 1))
}
clusters = make([]uint32, num_clusters)
for i = 0; i < num_clusters; i++ {
clusters[i] = uint32(i)
}
num_final_clusters = histogramCombineCommand(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
pairs = nil
cluster_size = nil
new_index = make([]uint32, num_clusters)
for i = 0; i < num_clusters; i++ {
new_index[i] = clusterBlocksCommand_kInvalidIndex
}
pos = 0
{
var next_index uint32 = 0
for i = 0; i < num_blocks; i++ {
var histo histogramCommand
var j uint
var best_out uint32
var best_bits float64
histogramClearCommand(&histo)
for j = 0; uint32(j) < block_lengths[i]; j++ {
histogramAddCommand(&histo, uint(data[pos]))
pos++
}
if i == 0 {
best_out = histogram_symbols[0]
} else {
best_out = histogram_symbols[i-1]
}
best_bits = histogramBitCostDistanceCommand(&histo, &all_histograms[best_out])
for j = 0; j < num_final_clusters; j++ {
var cur_bits float64 = histogramBitCostDistanceCommand(&histo, &all_histograms[clusters[j]])
if cur_bits < best_bits {
best_bits = cur_bits
best_out = clusters[j]
}
}
histogram_symbols[i] = best_out
if new_index[best_out] == clusterBlocksCommand_kInvalidIndex {
new_index[best_out] = next_index
next_index++
}
}
}
clusters = nil
all_histograms = nil
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
{
var cur_length uint32 = 0
var block_idx uint = 0
var max_type byte = 0
for i = 0; i < num_blocks; i++ {
cur_length += block_lengths[i]
if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
var id byte = byte(new_index[histogram_symbols[i]])
split.types[block_idx] = id
split.lengths[block_idx] = cur_length
max_type = brotli_max_uint8_t(max_type, id)
cur_length = 0
block_idx++
}
}
split.num_blocks = block_idx
split.num_types = uint(max_type) + 1
}
new_index = nil
block_lengths = nil
histogram_symbols = nil
}
func splitByteVectorCommand(data []uint16, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
length := uint(len(data))
var data_size uint = histogramDataSizeCommand()
var num_histograms uint = length/literals_per_histogram + 1
var histograms []histogramCommand
if num_histograms > max_histograms {
num_histograms = max_histograms
}
if length == 0 {
split.num_types = 1
return
} else if length < kMinLengthForBlockSplitting {
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
split.num_types = 1
split.types[split.num_blocks] = 0
split.lengths[split.num_blocks] = uint32(length)
split.num_blocks++
return
}
histograms = make([]histogramCommand, num_histograms)
/* Find good entropy codes. */
initialEntropyCodesCommand(data, length, sampling_stride_length, num_histograms, histograms)
refineEntropyCodesCommand(data, length, sampling_stride_length, num_histograms, histograms)
{
var block_ids []byte = make([]byte, length)
var num_blocks uint = 0
var bitmaplen uint = (num_histograms + 7) >> 3
var insert_cost []float64 = make([]float64, (data_size * num_histograms))
var cost []float64 = make([]float64, num_histograms)
var switch_signal []byte = make([]byte, (length * bitmaplen))
var new_id []uint16 = make([]uint16, num_histograms)
var iters uint
if params.quality < hqZopflificationQuality {
iters = 3
} else {
iters = 10
}
/* Find a good path through literals with the good entropy codes. */
var i uint
for i = 0; i < iters; i++ {
num_blocks = findBlocksCommand(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
num_histograms = remapBlockIdsCommand(block_ids, length, new_id, num_histograms)
buildBlockHistogramsCommand(data, length, block_ids, num_histograms, histograms)
}
insert_cost = nil
cost = nil
switch_signal = nil
new_id = nil
histograms = nil
clusterBlocksCommand(data, length, num_blocks, block_ids, split)
block_ids = nil
}
}
+433
View File
@@ -0,0 +1,433 @@
package brotli
import "math"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func initialEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
var seed uint32 = 7
var block_length uint = length / num_histograms
var i uint
clearHistogramsDistance(histograms, num_histograms)
for i = 0; i < num_histograms; i++ {
var pos uint = length * i / num_histograms
if i != 0 {
pos += uint(myRand(&seed) % uint32(block_length))
}
if pos+stride >= length {
pos = length - stride - 1
}
histogramAddVectorDistance(&histograms[i], data[pos:], stride)
}
}
func randomSampleDistance(seed *uint32, data []uint16, length uint, stride uint, sample *histogramDistance) {
var pos uint = 0
if stride >= length {
stride = length
} else {
pos = uint(myRand(seed) % uint32(length-stride+1))
}
histogramAddVectorDistance(sample, data[pos:], stride)
}
func refineEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
var seed uint32 = 7
var iter uint
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
for iter = 0; iter < iters; iter++ {
var sample histogramDistance
histogramClearDistance(&sample)
randomSampleDistance(&seed, data, length, stride, &sample)
histogramAddHistogramDistance(&histograms[iter%num_histograms], &sample)
}
}
/* Assigns a block id from the range [0, num_histograms) to each data element
in data[0..length) and fills in block_id[0..length) with the assigned values.
Returns the number of blocks, i.e. one plus the number of block switches. */
func findBlocksDistance(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramDistance, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
var data_size uint = histogramDataSizeDistance()
var bitmaplen uint = (num_histograms + 7) >> 3
var num_blocks uint = 1
var i uint
var j uint
assert(num_histograms <= 256)
if num_histograms <= 1 {
for i = 0; i < length; i++ {
block_id[i] = 0
}
return 1
}
for i := 0; i < int(data_size*num_histograms); i++ {
insert_cost[i] = 0
}
for i = 0; i < num_histograms; i++ {
insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
}
for i = data_size; i != 0; {
i--
for j = 0; j < num_histograms; j++ {
insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
}
}
for i := 0; i < int(num_histograms); i++ {
cost[i] = 0
}
for i := 0; i < int(length*bitmaplen); i++ {
switch_signal[i] = 0
}
/* After each iteration of this loop, cost[k] will contain the difference
between the minimum cost of arriving at the current byte position using
entropy code k, and the minimum cost of arriving at the current byte
position. This difference is capped at the block switch cost, and if it
reaches block switch cost, it means that when we trace back from the last
position, we need to switch here. */
for i = 0; i < length; i++ {
var byte_ix uint = i
var ix uint = byte_ix * bitmaplen
var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
var min_cost float64 = 1e99
var block_switch_cost float64 = block_switch_bitcost
var k uint
for k = 0; k < num_histograms; k++ {
/* We are coding the symbol in data[byte_ix] with entropy code k. */
cost[k] += insert_cost[insert_cost_ix+k]
if cost[k] < min_cost {
min_cost = cost[k]
block_id[byte_ix] = byte(k)
}
}
/* More blocks for the beginning. */
if byte_ix < 2000 {
block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
}
for k = 0; k < num_histograms; k++ {
cost[k] -= min_cost
if cost[k] >= block_switch_cost {
var mask byte = byte(1 << (k & 7))
cost[k] = block_switch_cost
assert(k>>3 < bitmaplen)
switch_signal[ix+(k>>3)] |= mask
/* Trace back from the last position and switch at the marked places. */
}
}
}
{
var byte_ix uint = length - 1
var ix uint = byte_ix * bitmaplen
var cur_id byte = block_id[byte_ix]
for byte_ix > 0 {
var mask byte = byte(1 << (cur_id & 7))
assert(uint(cur_id)>>3 < bitmaplen)
byte_ix--
ix -= bitmaplen
if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
if cur_id != block_id[byte_ix] {
cur_id = block_id[byte_ix]
num_blocks++
}
}
block_id[byte_ix] = cur_id
}
}
return num_blocks
}
var remapBlockIdsDistance_kInvalidId uint16 = 256
func remapBlockIdsDistance(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
var next_id uint16 = 0
var i uint
for i = 0; i < num_histograms; i++ {
new_id[i] = remapBlockIdsDistance_kInvalidId
}
for i = 0; i < length; i++ {
assert(uint(block_ids[i]) < num_histograms)
if new_id[block_ids[i]] == remapBlockIdsDistance_kInvalidId {
new_id[block_ids[i]] = next_id
next_id++
}
}
for i = 0; i < length; i++ {
block_ids[i] = byte(new_id[block_ids[i]])
assert(uint(block_ids[i]) < num_histograms)
}
assert(uint(next_id) <= num_histograms)
return uint(next_id)
}
func buildBlockHistogramsDistance(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramDistance) {
var i uint
clearHistogramsDistance(histograms, num_histograms)
for i = 0; i < length; i++ {
histogramAddDistance(&histograms[block_ids[i]], uint(data[i]))
}
}
var clusterBlocksDistance_kInvalidIndex uint32 = math.MaxUint32
func clusterBlocksDistance(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
var histogram_symbols []uint32 = make([]uint32, num_blocks)
var block_lengths []uint32 = make([]uint32, num_blocks)
var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
var all_histograms_size uint = 0
var all_histograms_capacity uint = expected_num_clusters
var all_histograms []histogramDistance = make([]histogramDistance, all_histograms_capacity)
var cluster_size_size uint = 0
var cluster_size_capacity uint = expected_num_clusters
var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
var num_clusters uint = 0
var histograms []histogramDistance = make([]histogramDistance, brotli_min_size_t(num_blocks, histogramsPerBatch))
var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
var pairs_capacity uint = max_num_pairs + 1
var pairs []histogramPair = make([]histogramPair, pairs_capacity)
var pos uint = 0
var clusters []uint32
var num_final_clusters uint
var new_index []uint32
var i uint
var sizes = [histogramsPerBatch]uint32{0}
var new_clusters = [histogramsPerBatch]uint32{0}
var symbols = [histogramsPerBatch]uint32{0}
var remap = [histogramsPerBatch]uint32{0}
for i := 0; i < int(num_blocks); i++ {
block_lengths[i] = 0
}
{
var block_idx uint = 0
for i = 0; i < length; i++ {
assert(block_idx < num_blocks)
block_lengths[block_idx]++
if i+1 == length || block_ids[i] != block_ids[i+1] {
block_idx++
}
}
assert(block_idx == num_blocks)
}
for i = 0; i < num_blocks; i += histogramsPerBatch {
var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
var num_new_clusters uint
var j uint
for j = 0; j < num_to_combine; j++ {
var k uint
histogramClearDistance(&histograms[j])
for k = 0; uint32(k) < block_lengths[i+j]; k++ {
histogramAddDistance(&histograms[j], uint(data[pos]))
pos++
}
histograms[j].bit_cost_ = populationCostDistance(&histograms[j])
new_clusters[j] = uint32(j)
symbols[j] = uint32(j)
sizes[j] = 1
}
num_new_clusters = histogramCombineDistance(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
var _new_size uint
if all_histograms_capacity == 0 {
_new_size = all_histograms_size + num_new_clusters
} else {
_new_size = all_histograms_capacity
}
var new_array []histogramDistance
for _new_size < (all_histograms_size + num_new_clusters) {
_new_size *= 2
}
new_array = make([]histogramDistance, _new_size)
if all_histograms_capacity != 0 {
copy(new_array, all_histograms[:all_histograms_capacity])
}
all_histograms = new_array
all_histograms_capacity = _new_size
}
brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
for j = 0; j < num_new_clusters; j++ {
all_histograms[all_histograms_size] = histograms[new_clusters[j]]
all_histograms_size++
cluster_size[cluster_size_size] = sizes[new_clusters[j]]
cluster_size_size++
remap[new_clusters[j]] = uint32(j)
}
for j = 0; j < num_to_combine; j++ {
histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
}
num_clusters += num_new_clusters
assert(num_clusters == cluster_size_size)
assert(num_clusters == all_histograms_size)
}
histograms = nil
max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
if pairs_capacity < max_num_pairs+1 {
pairs = nil
pairs = make([]histogramPair, (max_num_pairs + 1))
}
clusters = make([]uint32, num_clusters)
for i = 0; i < num_clusters; i++ {
clusters[i] = uint32(i)
}
num_final_clusters = histogramCombineDistance(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
pairs = nil
cluster_size = nil
new_index = make([]uint32, num_clusters)
for i = 0; i < num_clusters; i++ {
new_index[i] = clusterBlocksDistance_kInvalidIndex
}
pos = 0
{
var next_index uint32 = 0
for i = 0; i < num_blocks; i++ {
var histo histogramDistance
var j uint
var best_out uint32
var best_bits float64
histogramClearDistance(&histo)
for j = 0; uint32(j) < block_lengths[i]; j++ {
histogramAddDistance(&histo, uint(data[pos]))
pos++
}
if i == 0 {
best_out = histogram_symbols[0]
} else {
best_out = histogram_symbols[i-1]
}
best_bits = histogramBitCostDistanceDistance(&histo, &all_histograms[best_out])
for j = 0; j < num_final_clusters; j++ {
var cur_bits float64 = histogramBitCostDistanceDistance(&histo, &all_histograms[clusters[j]])
if cur_bits < best_bits {
best_bits = cur_bits
best_out = clusters[j]
}
}
histogram_symbols[i] = best_out
if new_index[best_out] == clusterBlocksDistance_kInvalidIndex {
new_index[best_out] = next_index
next_index++
}
}
}
clusters = nil
all_histograms = nil
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
{
var cur_length uint32 = 0
var block_idx uint = 0
var max_type byte = 0
for i = 0; i < num_blocks; i++ {
cur_length += block_lengths[i]
if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
var id byte = byte(new_index[histogram_symbols[i]])
split.types[block_idx] = id
split.lengths[block_idx] = cur_length
max_type = brotli_max_uint8_t(max_type, id)
cur_length = 0
block_idx++
}
}
split.num_blocks = block_idx
split.num_types = uint(max_type) + 1
}
new_index = nil
block_lengths = nil
histogram_symbols = nil
}
func splitByteVectorDistance(data []uint16, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
var data_size uint = histogramDataSizeDistance()
var num_histograms uint = length/literals_per_histogram + 1
var histograms []histogramDistance
if num_histograms > max_histograms {
num_histograms = max_histograms
}
if length == 0 {
split.num_types = 1
return
} else if length < kMinLengthForBlockSplitting {
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
split.num_types = 1
split.types[split.num_blocks] = 0
split.lengths[split.num_blocks] = uint32(length)
split.num_blocks++
return
}
histograms = make([]histogramDistance, num_histograms)
/* Find good entropy codes. */
initialEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
refineEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
{
var block_ids []byte = make([]byte, length)
var num_blocks uint = 0
var bitmaplen uint = (num_histograms + 7) >> 3
var insert_cost []float64 = make([]float64, (data_size * num_histograms))
var cost []float64 = make([]float64, num_histograms)
var switch_signal []byte = make([]byte, (length * bitmaplen))
var new_id []uint16 = make([]uint16, num_histograms)
var iters uint
if params.quality < hqZopflificationQuality {
iters = 3
} else {
iters = 10
}
/* Find a good path through literals with the good entropy codes. */
var i uint
for i = 0; i < iters; i++ {
num_blocks = findBlocksDistance(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
num_histograms = remapBlockIdsDistance(block_ids, length, new_id, num_histograms)
buildBlockHistogramsDistance(data, length, block_ids, num_histograms, histograms)
}
insert_cost = nil
cost = nil
switch_signal = nil
new_id = nil
histograms = nil
clusterBlocksDistance(data, length, num_blocks, block_ids, split)
block_ids = nil
}
}
+433
View File
@@ -0,0 +1,433 @@
package brotli
import "math"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func initialEntropyCodesLiteral(data []byte, length uint, stride uint, num_histograms uint, histograms []histogramLiteral) {
var seed uint32 = 7
var block_length uint = length / num_histograms
var i uint
clearHistogramsLiteral(histograms, num_histograms)
for i = 0; i < num_histograms; i++ {
var pos uint = length * i / num_histograms
if i != 0 {
pos += uint(myRand(&seed) % uint32(block_length))
}
if pos+stride >= length {
pos = length - stride - 1
}
histogramAddVectorLiteral(&histograms[i], data[pos:], stride)
}
}
func randomSampleLiteral(seed *uint32, data []byte, length uint, stride uint, sample *histogramLiteral) {
var pos uint = 0
if stride >= length {
stride = length
} else {
pos = uint(myRand(seed) % uint32(length-stride+1))
}
histogramAddVectorLiteral(sample, data[pos:], stride)
}
func refineEntropyCodesLiteral(data []byte, length uint, stride uint, num_histograms uint, histograms []histogramLiteral) {
var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
var seed uint32 = 7
var iter uint
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
for iter = 0; iter < iters; iter++ {
var sample histogramLiteral
histogramClearLiteral(&sample)
randomSampleLiteral(&seed, data, length, stride, &sample)
histogramAddHistogramLiteral(&histograms[iter%num_histograms], &sample)
}
}
/* Assigns a block id from the range [0, num_histograms) to each data element
in data[0..length) and fills in block_id[0..length) with the assigned values.
Returns the number of blocks, i.e. one plus the number of block switches. */
func findBlocksLiteral(data []byte, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramLiteral, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
var data_size uint = histogramDataSizeLiteral()
var bitmaplen uint = (num_histograms + 7) >> 3
var num_blocks uint = 1
var i uint
var j uint
assert(num_histograms <= 256)
if num_histograms <= 1 {
for i = 0; i < length; i++ {
block_id[i] = 0
}
return 1
}
for i := 0; i < int(data_size*num_histograms); i++ {
insert_cost[i] = 0
}
for i = 0; i < num_histograms; i++ {
insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
}
for i = data_size; i != 0; {
i--
for j = 0; j < num_histograms; j++ {
insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
}
}
for i := 0; i < int(num_histograms); i++ {
cost[i] = 0
}
for i := 0; i < int(length*bitmaplen); i++ {
switch_signal[i] = 0
}
/* After each iteration of this loop, cost[k] will contain the difference
between the minimum cost of arriving at the current byte position using
entropy code k, and the minimum cost of arriving at the current byte
position. This difference is capped at the block switch cost, and if it
reaches block switch cost, it means that when we trace back from the last
position, we need to switch here. */
for i = 0; i < length; i++ {
var byte_ix uint = i
var ix uint = byte_ix * bitmaplen
var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
var min_cost float64 = 1e99
var block_switch_cost float64 = block_switch_bitcost
var k uint
for k = 0; k < num_histograms; k++ {
/* We are coding the symbol in data[byte_ix] with entropy code k. */
cost[k] += insert_cost[insert_cost_ix+k]
if cost[k] < min_cost {
min_cost = cost[k]
block_id[byte_ix] = byte(k)
}
}
/* More blocks for the beginning. */
if byte_ix < 2000 {
block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
}
for k = 0; k < num_histograms; k++ {
cost[k] -= min_cost
if cost[k] >= block_switch_cost {
var mask byte = byte(1 << (k & 7))
cost[k] = block_switch_cost
assert(k>>3 < bitmaplen)
switch_signal[ix+(k>>3)] |= mask
/* Trace back from the last position and switch at the marked places. */
}
}
}
{
var byte_ix uint = length - 1
var ix uint = byte_ix * bitmaplen
var cur_id byte = block_id[byte_ix]
for byte_ix > 0 {
var mask byte = byte(1 << (cur_id & 7))
assert(uint(cur_id)>>3 < bitmaplen)
byte_ix--
ix -= bitmaplen
if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
if cur_id != block_id[byte_ix] {
cur_id = block_id[byte_ix]
num_blocks++
}
}
block_id[byte_ix] = cur_id
}
}
return num_blocks
}
var remapBlockIdsLiteral_kInvalidId uint16 = 256
func remapBlockIdsLiteral(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
var next_id uint16 = 0
var i uint
for i = 0; i < num_histograms; i++ {
new_id[i] = remapBlockIdsLiteral_kInvalidId
}
for i = 0; i < length; i++ {
assert(uint(block_ids[i]) < num_histograms)
if new_id[block_ids[i]] == remapBlockIdsLiteral_kInvalidId {
new_id[block_ids[i]] = next_id
next_id++
}
}
for i = 0; i < length; i++ {
block_ids[i] = byte(new_id[block_ids[i]])
assert(uint(block_ids[i]) < num_histograms)
}
assert(uint(next_id) <= num_histograms)
return uint(next_id)
}
func buildBlockHistogramsLiteral(data []byte, length uint, block_ids []byte, num_histograms uint, histograms []histogramLiteral) {
var i uint
clearHistogramsLiteral(histograms, num_histograms)
for i = 0; i < length; i++ {
histogramAddLiteral(&histograms[block_ids[i]], uint(data[i]))
}
}
var clusterBlocksLiteral_kInvalidIndex uint32 = math.MaxUint32
func clusterBlocksLiteral(data []byte, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
var histogram_symbols []uint32 = make([]uint32, num_blocks)
var block_lengths []uint32 = make([]uint32, num_blocks)
var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
var all_histograms_size uint = 0
var all_histograms_capacity uint = expected_num_clusters
var all_histograms []histogramLiteral = make([]histogramLiteral, all_histograms_capacity)
var cluster_size_size uint = 0
var cluster_size_capacity uint = expected_num_clusters
var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
var num_clusters uint = 0
var histograms []histogramLiteral = make([]histogramLiteral, brotli_min_size_t(num_blocks, histogramsPerBatch))
var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
var pairs_capacity uint = max_num_pairs + 1
var pairs []histogramPair = make([]histogramPair, pairs_capacity)
var pos uint = 0
var clusters []uint32
var num_final_clusters uint
var new_index []uint32
var i uint
var sizes = [histogramsPerBatch]uint32{0}
var new_clusters = [histogramsPerBatch]uint32{0}
var symbols = [histogramsPerBatch]uint32{0}
var remap = [histogramsPerBatch]uint32{0}
for i := 0; i < int(num_blocks); i++ {
block_lengths[i] = 0
}
{
var block_idx uint = 0
for i = 0; i < length; i++ {
assert(block_idx < num_blocks)
block_lengths[block_idx]++
if i+1 == length || block_ids[i] != block_ids[i+1] {
block_idx++
}
}
assert(block_idx == num_blocks)
}
for i = 0; i < num_blocks; i += histogramsPerBatch {
var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
var num_new_clusters uint
var j uint
for j = 0; j < num_to_combine; j++ {
var k uint
histogramClearLiteral(&histograms[j])
for k = 0; uint32(k) < block_lengths[i+j]; k++ {
histogramAddLiteral(&histograms[j], uint(data[pos]))
pos++
}
histograms[j].bit_cost_ = populationCostLiteral(&histograms[j])
new_clusters[j] = uint32(j)
symbols[j] = uint32(j)
sizes[j] = 1
}
num_new_clusters = histogramCombineLiteral(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
var _new_size uint
if all_histograms_capacity == 0 {
_new_size = all_histograms_size + num_new_clusters
} else {
_new_size = all_histograms_capacity
}
var new_array []histogramLiteral
for _new_size < (all_histograms_size + num_new_clusters) {
_new_size *= 2
}
new_array = make([]histogramLiteral, _new_size)
if all_histograms_capacity != 0 {
copy(new_array, all_histograms[:all_histograms_capacity])
}
all_histograms = new_array
all_histograms_capacity = _new_size
}
brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
for j = 0; j < num_new_clusters; j++ {
all_histograms[all_histograms_size] = histograms[new_clusters[j]]
all_histograms_size++
cluster_size[cluster_size_size] = sizes[new_clusters[j]]
cluster_size_size++
remap[new_clusters[j]] = uint32(j)
}
for j = 0; j < num_to_combine; j++ {
histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
}
num_clusters += num_new_clusters
assert(num_clusters == cluster_size_size)
assert(num_clusters == all_histograms_size)
}
histograms = nil
max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
if pairs_capacity < max_num_pairs+1 {
pairs = nil
pairs = make([]histogramPair, (max_num_pairs + 1))
}
clusters = make([]uint32, num_clusters)
for i = 0; i < num_clusters; i++ {
clusters[i] = uint32(i)
}
num_final_clusters = histogramCombineLiteral(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
pairs = nil
cluster_size = nil
new_index = make([]uint32, num_clusters)
for i = 0; i < num_clusters; i++ {
new_index[i] = clusterBlocksLiteral_kInvalidIndex
}
pos = 0
{
var next_index uint32 = 0
for i = 0; i < num_blocks; i++ {
var histo histogramLiteral
var j uint
var best_out uint32
var best_bits float64
histogramClearLiteral(&histo)
for j = 0; uint32(j) < block_lengths[i]; j++ {
histogramAddLiteral(&histo, uint(data[pos]))
pos++
}
if i == 0 {
best_out = histogram_symbols[0]
} else {
best_out = histogram_symbols[i-1]
}
best_bits = histogramBitCostDistanceLiteral(&histo, &all_histograms[best_out])
for j = 0; j < num_final_clusters; j++ {
var cur_bits float64 = histogramBitCostDistanceLiteral(&histo, &all_histograms[clusters[j]])
if cur_bits < best_bits {
best_bits = cur_bits
best_out = clusters[j]
}
}
histogram_symbols[i] = best_out
if new_index[best_out] == clusterBlocksLiteral_kInvalidIndex {
new_index[best_out] = next_index
next_index++
}
}
}
clusters = nil
all_histograms = nil
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
{
var cur_length uint32 = 0
var block_idx uint = 0
var max_type byte = 0
for i = 0; i < num_blocks; i++ {
cur_length += block_lengths[i]
if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
var id byte = byte(new_index[histogram_symbols[i]])
split.types[block_idx] = id
split.lengths[block_idx] = cur_length
max_type = brotli_max_uint8_t(max_type, id)
cur_length = 0
block_idx++
}
}
split.num_blocks = block_idx
split.num_types = uint(max_type) + 1
}
new_index = nil
block_lengths = nil
histogram_symbols = nil
}
func splitByteVectorLiteral(data []byte, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
var data_size uint = histogramDataSizeLiteral()
var num_histograms uint = length/literals_per_histogram + 1
var histograms []histogramLiteral
if num_histograms > max_histograms {
num_histograms = max_histograms
}
if length == 0 {
split.num_types = 1
return
} else if length < kMinLengthForBlockSplitting {
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
split.num_types = 1
split.types[split.num_blocks] = 0
split.lengths[split.num_blocks] = uint32(length)
split.num_blocks++
return
}
histograms = make([]histogramLiteral, num_histograms)
/* Find good entropy codes. */
initialEntropyCodesLiteral(data, length, sampling_stride_length, num_histograms, histograms)
refineEntropyCodesLiteral(data, length, sampling_stride_length, num_histograms, histograms)
{
var block_ids []byte = make([]byte, length)
var num_blocks uint = 0
var bitmaplen uint = (num_histograms + 7) >> 3
var insert_cost []float64 = make([]float64, (data_size * num_histograms))
var cost []float64 = make([]float64, num_histograms)
var switch_signal []byte = make([]byte, (length * bitmaplen))
var new_id []uint16 = make([]uint16, num_histograms)
var iters uint
if params.quality < hqZopflificationQuality {
iters = 3
} else {
iters = 10
}
/* Find a good path through literals with the good entropy codes. */
var i uint
for i = 0; i < iters; i++ {
num_blocks = findBlocksLiteral(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
num_histograms = remapBlockIdsLiteral(block_ids, length, new_id, num_histograms)
buildBlockHistogramsLiteral(data, length, block_ids, num_histograms, histograms)
}
insert_cost = nil
cost = nil
switch_signal = nil
new_id = nil
histograms = nil
clusterBlocksLiteral(data, length, num_blocks, block_ids, split)
block_ids = nil
}
}
File diff suppressed because it is too large Load Diff
+30
View File
@@ -0,0 +1,30 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions for clustering similar histograms together. */
type histogramPair struct {
idx1 uint32
idx2 uint32
cost_combo float64
cost_diff float64
}
func histogramPairIsLess(p1 *histogramPair, p2 *histogramPair) bool {
if p1.cost_diff != p2.cost_diff {
return p1.cost_diff > p2.cost_diff
}
return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1)
}
/* Returns entropy reduction of the context map when we combine two clusters. */
func clusterCostDiff(size_a uint, size_b uint) float64 {
var size_c uint = size_a + size_b
return float64(size_a)*fastLog2(size_a) + float64(size_b)*fastLog2(size_b) - float64(size_c)*fastLog2(size_c)
}
+164
View File
@@ -0,0 +1,164 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
func compareAndPushToQueueCommand(out []histogramCommand, cluster_size []uint32, idx1 uint32, idx2 uint32, max_num_pairs uint, pairs []histogramPair, num_pairs *uint) {
var is_good_pair bool = false
var p histogramPair
p.idx2 = 0
p.idx1 = p.idx2
p.cost_combo = 0
p.cost_diff = p.cost_combo
if idx1 == idx2 {
return
}
if idx2 < idx1 {
var t uint32 = idx2
idx2 = idx1
idx1 = t
}
p.idx1 = idx1
p.idx2 = idx2
p.cost_diff = 0.5 * clusterCostDiff(uint(cluster_size[idx1]), uint(cluster_size[idx2]))
p.cost_diff -= out[idx1].bit_cost_
p.cost_diff -= out[idx2].bit_cost_
if out[idx1].total_count_ == 0 {
p.cost_combo = out[idx2].bit_cost_
is_good_pair = true
} else if out[idx2].total_count_ == 0 {
p.cost_combo = out[idx1].bit_cost_
is_good_pair = true
} else {
var threshold float64
if *num_pairs == 0 {
threshold = 1e99
} else {
threshold = brotli_max_double(0.0, pairs[0].cost_diff)
}
var combo histogramCommand = out[idx1]
var cost_combo float64
histogramAddHistogramCommand(&combo, &out[idx2])
cost_combo = populationCostCommand(&combo)
if cost_combo < threshold-p.cost_diff {
p.cost_combo = cost_combo
is_good_pair = true
}
}
if is_good_pair {
p.cost_diff += p.cost_combo
if *num_pairs > 0 && histogramPairIsLess(&pairs[0], &p) {
/* Replace the top of the queue if needed. */
if *num_pairs < max_num_pairs {
pairs[*num_pairs] = pairs[0]
(*num_pairs)++
}
pairs[0] = p
} else if *num_pairs < max_num_pairs {
pairs[*num_pairs] = p
(*num_pairs)++
}
}
}
func histogramCombineCommand(out []histogramCommand, cluster_size []uint32, symbols []uint32, clusters []uint32, pairs []histogramPair, num_clusters uint, symbols_size uint, max_clusters uint, max_num_pairs uint) uint {
var cost_diff_threshold float64 = 0.0
var min_cluster_size uint = 1
var num_pairs uint = 0
{
/* We maintain a vector of histogram pairs, with the property that the pair
with the maximum bit cost reduction is the first. */
var idx1 uint
for idx1 = 0; idx1 < num_clusters; idx1++ {
var idx2 uint
for idx2 = idx1 + 1; idx2 < num_clusters; idx2++ {
compareAndPushToQueueCommand(out, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, pairs[0:], &num_pairs)
}
}
}
for num_clusters > min_cluster_size {
var best_idx1 uint32
var best_idx2 uint32
var i uint
if pairs[0].cost_diff >= cost_diff_threshold {
cost_diff_threshold = 1e99
min_cluster_size = max_clusters
continue
}
/* Take the best pair from the top of heap. */
best_idx1 = pairs[0].idx1
best_idx2 = pairs[0].idx2
histogramAddHistogramCommand(&out[best_idx1], &out[best_idx2])
out[best_idx1].bit_cost_ = pairs[0].cost_combo
cluster_size[best_idx1] += cluster_size[best_idx2]
for i = 0; i < symbols_size; i++ {
if symbols[i] == best_idx2 {
symbols[i] = best_idx1
}
}
for i = 0; i < num_clusters; i++ {
if clusters[i] == best_idx2 {
copy(clusters[i:], clusters[i+1:][:num_clusters-i-1])
break
}
}
num_clusters--
{
/* Remove pairs intersecting the just combined best pair. */
var copy_to_idx uint = 0
for i = 0; i < num_pairs; i++ {
var p *histogramPair = &pairs[i]
if p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2 {
/* Remove invalid pair from the queue. */
continue
}
if histogramPairIsLess(&pairs[0], p) {
/* Replace the top of the queue if needed. */
var front histogramPair = pairs[0]
pairs[0] = *p
pairs[copy_to_idx] = front
} else {
pairs[copy_to_idx] = *p
}
copy_to_idx++
}
num_pairs = copy_to_idx
}
/* Push new pairs formed with the combined histogram to the heap. */
for i = 0; i < num_clusters; i++ {
compareAndPushToQueueCommand(out, cluster_size, best_idx1, clusters[i], max_num_pairs, pairs[0:], &num_pairs)
}
}
return num_clusters
}
/* What is the bit cost of moving histogram from cur_symbol to candidate. */
func histogramBitCostDistanceCommand(histogram *histogramCommand, candidate *histogramCommand) float64 {
if histogram.total_count_ == 0 {
return 0.0
} else {
var tmp histogramCommand = *histogram
histogramAddHistogramCommand(&tmp, candidate)
return populationCostCommand(&tmp) - candidate.bit_cost_
}
}
+326
View File
@@ -0,0 +1,326 @@
package brotli
import "math"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
func compareAndPushToQueueDistance(out []histogramDistance, cluster_size []uint32, idx1 uint32, idx2 uint32, max_num_pairs uint, pairs []histogramPair, num_pairs *uint) {
var is_good_pair bool = false
var p histogramPair
p.idx2 = 0
p.idx1 = p.idx2
p.cost_combo = 0
p.cost_diff = p.cost_combo
if idx1 == idx2 {
return
}
if idx2 < idx1 {
var t uint32 = idx2
idx2 = idx1
idx1 = t
}
p.idx1 = idx1
p.idx2 = idx2
p.cost_diff = 0.5 * clusterCostDiff(uint(cluster_size[idx1]), uint(cluster_size[idx2]))
p.cost_diff -= out[idx1].bit_cost_
p.cost_diff -= out[idx2].bit_cost_
if out[idx1].total_count_ == 0 {
p.cost_combo = out[idx2].bit_cost_
is_good_pair = true
} else if out[idx2].total_count_ == 0 {
p.cost_combo = out[idx1].bit_cost_
is_good_pair = true
} else {
var threshold float64
if *num_pairs == 0 {
threshold = 1e99
} else {
threshold = brotli_max_double(0.0, pairs[0].cost_diff)
}
var combo histogramDistance = out[idx1]
var cost_combo float64
histogramAddHistogramDistance(&combo, &out[idx2])
cost_combo = populationCostDistance(&combo)
if cost_combo < threshold-p.cost_diff {
p.cost_combo = cost_combo
is_good_pair = true
}
}
if is_good_pair {
p.cost_diff += p.cost_combo
if *num_pairs > 0 && histogramPairIsLess(&pairs[0], &p) {
/* Replace the top of the queue if needed. */
if *num_pairs < max_num_pairs {
pairs[*num_pairs] = pairs[0]
(*num_pairs)++
}
pairs[0] = p
} else if *num_pairs < max_num_pairs {
pairs[*num_pairs] = p
(*num_pairs)++
}
}
}
func histogramCombineDistance(out []histogramDistance, cluster_size []uint32, symbols []uint32, clusters []uint32, pairs []histogramPair, num_clusters uint, symbols_size uint, max_clusters uint, max_num_pairs uint) uint {
var cost_diff_threshold float64 = 0.0
var min_cluster_size uint = 1
var num_pairs uint = 0
{
/* We maintain a vector of histogram pairs, with the property that the pair
with the maximum bit cost reduction is the first. */
var idx1 uint
for idx1 = 0; idx1 < num_clusters; idx1++ {
var idx2 uint
for idx2 = idx1 + 1; idx2 < num_clusters; idx2++ {
compareAndPushToQueueDistance(out, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, pairs[0:], &num_pairs)
}
}
}
for num_clusters > min_cluster_size {
var best_idx1 uint32
var best_idx2 uint32
var i uint
if pairs[0].cost_diff >= cost_diff_threshold {
cost_diff_threshold = 1e99
min_cluster_size = max_clusters
continue
}
/* Take the best pair from the top of heap. */
best_idx1 = pairs[0].idx1
best_idx2 = pairs[0].idx2
histogramAddHistogramDistance(&out[best_idx1], &out[best_idx2])
out[best_idx1].bit_cost_ = pairs[0].cost_combo
cluster_size[best_idx1] += cluster_size[best_idx2]
for i = 0; i < symbols_size; i++ {
if symbols[i] == best_idx2 {
symbols[i] = best_idx1
}
}
for i = 0; i < num_clusters; i++ {
if clusters[i] == best_idx2 {
copy(clusters[i:], clusters[i+1:][:num_clusters-i-1])
break
}
}
num_clusters--
{
/* Remove pairs intersecting the just combined best pair. */
var copy_to_idx uint = 0
for i = 0; i < num_pairs; i++ {
var p *histogramPair = &pairs[i]
if p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2 {
/* Remove invalid pair from the queue. */
continue
}
if histogramPairIsLess(&pairs[0], p) {
/* Replace the top of the queue if needed. */
var front histogramPair = pairs[0]
pairs[0] = *p
pairs[copy_to_idx] = front
} else {
pairs[copy_to_idx] = *p
}
copy_to_idx++
}
num_pairs = copy_to_idx
}
/* Push new pairs formed with the combined histogram to the heap. */
for i = 0; i < num_clusters; i++ {
compareAndPushToQueueDistance(out, cluster_size, best_idx1, clusters[i], max_num_pairs, pairs[0:], &num_pairs)
}
}
return num_clusters
}
/* What is the bit cost of moving histogram from cur_symbol to candidate. */
func histogramBitCostDistanceDistance(histogram *histogramDistance, candidate *histogramDistance) float64 {
if histogram.total_count_ == 0 {
return 0.0
} else {
var tmp histogramDistance = *histogram
histogramAddHistogramDistance(&tmp, candidate)
return populationCostDistance(&tmp) - candidate.bit_cost_
}
}
/* Find the best 'out' histogram for each of the 'in' histograms.
When called, clusters[0..num_clusters) contains the unique values from
symbols[0..in_size), but this property is not preserved in this function.
Note: we assume that out[]->bit_cost_ is already up-to-date. */
func histogramRemapDistance(in []histogramDistance, in_size uint, clusters []uint32, num_clusters uint, out []histogramDistance, symbols []uint32) {
var i uint
for i = 0; i < in_size; i++ {
var best_out uint32
if i == 0 {
best_out = symbols[0]
} else {
best_out = symbols[i-1]
}
var best_bits float64 = histogramBitCostDistanceDistance(&in[i], &out[best_out])
var j uint
for j = 0; j < num_clusters; j++ {
var cur_bits float64 = histogramBitCostDistanceDistance(&in[i], &out[clusters[j]])
if cur_bits < best_bits {
best_bits = cur_bits
best_out = clusters[j]
}
}
symbols[i] = best_out
}
/* Recompute each out based on raw and symbols. */
for i = 0; i < num_clusters; i++ {
histogramClearDistance(&out[clusters[i]])
}
for i = 0; i < in_size; i++ {
histogramAddHistogramDistance(&out[symbols[i]], &in[i])
}
}
/* Reorders elements of the out[0..length) array and changes values in
symbols[0..length) array in the following way:
* when called, symbols[] contains indexes into out[], and has N unique
values (possibly N < length)
* on return, symbols'[i] = f(symbols[i]) and
out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
where f is a bijection between the range of symbols[] and [0..N), and
the first occurrences of values in symbols'[i] come in consecutive
increasing order.
Returns N, the number of unique values in symbols[]. */
var histogramReindexDistance_kInvalidIndex uint32 = math.MaxUint32
func histogramReindexDistance(out []histogramDistance, symbols []uint32, length uint) uint {
var new_index []uint32 = make([]uint32, length)
var next_index uint32
var tmp []histogramDistance
var i uint
for i = 0; i < length; i++ {
new_index[i] = histogramReindexDistance_kInvalidIndex
}
next_index = 0
for i = 0; i < length; i++ {
if new_index[symbols[i]] == histogramReindexDistance_kInvalidIndex {
new_index[symbols[i]] = next_index
next_index++
}
}
/* TODO: by using idea of "cycle-sort" we can avoid allocation of
tmp and reduce the number of copying by the factor of 2. */
tmp = make([]histogramDistance, next_index)
next_index = 0
for i = 0; i < length; i++ {
if new_index[symbols[i]] == next_index {
tmp[next_index] = out[symbols[i]]
next_index++
}
symbols[i] = new_index[symbols[i]]
}
new_index = nil
for i = 0; uint32(i) < next_index; i++ {
out[i] = tmp[i]
}
tmp = nil
return uint(next_index)
}
func clusterHistogramsDistance(in []histogramDistance, in_size uint, max_histograms uint, out []histogramDistance, out_size *uint, histogram_symbols []uint32) {
var cluster_size []uint32 = make([]uint32, in_size)
var clusters []uint32 = make([]uint32, in_size)
var num_clusters uint = 0
var max_input_histograms uint = 64
var pairs_capacity uint = max_input_histograms * max_input_histograms / 2
var pairs []histogramPair = make([]histogramPair, (pairs_capacity + 1))
var i uint
/* For the first pass of clustering, we allow all pairs. */
for i = 0; i < in_size; i++ {
cluster_size[i] = 1
}
for i = 0; i < in_size; i++ {
out[i] = in[i]
out[i].bit_cost_ = populationCostDistance(&in[i])
histogram_symbols[i] = uint32(i)
}
for i = 0; i < in_size; i += max_input_histograms {
var num_to_combine uint = brotli_min_size_t(in_size-i, max_input_histograms)
var num_new_clusters uint
var j uint
for j = 0; j < num_to_combine; j++ {
clusters[num_clusters+j] = uint32(i + j)
}
num_new_clusters = histogramCombineDistance(out, cluster_size, histogram_symbols[i:], clusters[num_clusters:], pairs, num_to_combine, num_to_combine, max_histograms, pairs_capacity)
num_clusters += num_new_clusters
}
{
/* For the second pass, we limit the total number of histogram pairs.
After this limit is reached, we only keep searching for the best pair. */
var max_num_pairs uint = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
if pairs_capacity < (max_num_pairs + 1) {
var _new_size uint
if pairs_capacity == 0 {
_new_size = max_num_pairs + 1
} else {
_new_size = pairs_capacity
}
var new_array []histogramPair
for _new_size < (max_num_pairs + 1) {
_new_size *= 2
}
new_array = make([]histogramPair, _new_size)
if pairs_capacity != 0 {
copy(new_array, pairs[:pairs_capacity])
}
pairs = new_array
pairs_capacity = _new_size
}
/* Collapse similar histograms. */
num_clusters = histogramCombineDistance(out, cluster_size, histogram_symbols, clusters, pairs, num_clusters, in_size, max_histograms, max_num_pairs)
}
pairs = nil
cluster_size = nil
/* Find the optimal map from original histograms to the final ones. */
histogramRemapDistance(in, in_size, clusters, num_clusters, out, histogram_symbols)
clusters = nil
/* Convert the context map to a canonical form. */
*out_size = histogramReindexDistance(out, histogram_symbols, in_size)
}
+326
View File
@@ -0,0 +1,326 @@
package brotli
import "math"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
func compareAndPushToQueueLiteral(out []histogramLiteral, cluster_size []uint32, idx1 uint32, idx2 uint32, max_num_pairs uint, pairs []histogramPair, num_pairs *uint) {
var is_good_pair bool = false
var p histogramPair
p.idx2 = 0
p.idx1 = p.idx2
p.cost_combo = 0
p.cost_diff = p.cost_combo
if idx1 == idx2 {
return
}
if idx2 < idx1 {
var t uint32 = idx2
idx2 = idx1
idx1 = t
}
p.idx1 = idx1
p.idx2 = idx2
p.cost_diff = 0.5 * clusterCostDiff(uint(cluster_size[idx1]), uint(cluster_size[idx2]))
p.cost_diff -= out[idx1].bit_cost_
p.cost_diff -= out[idx2].bit_cost_
if out[idx1].total_count_ == 0 {
p.cost_combo = out[idx2].bit_cost_
is_good_pair = true
} else if out[idx2].total_count_ == 0 {
p.cost_combo = out[idx1].bit_cost_
is_good_pair = true
} else {
var threshold float64
if *num_pairs == 0 {
threshold = 1e99
} else {
threshold = brotli_max_double(0.0, pairs[0].cost_diff)
}
var combo histogramLiteral = out[idx1]
var cost_combo float64
histogramAddHistogramLiteral(&combo, &out[idx2])
cost_combo = populationCostLiteral(&combo)
if cost_combo < threshold-p.cost_diff {
p.cost_combo = cost_combo
is_good_pair = true
}
}
if is_good_pair {
p.cost_diff += p.cost_combo
if *num_pairs > 0 && histogramPairIsLess(&pairs[0], &p) {
/* Replace the top of the queue if needed. */
if *num_pairs < max_num_pairs {
pairs[*num_pairs] = pairs[0]
(*num_pairs)++
}
pairs[0] = p
} else if *num_pairs < max_num_pairs {
pairs[*num_pairs] = p
(*num_pairs)++
}
}
}
func histogramCombineLiteral(out []histogramLiteral, cluster_size []uint32, symbols []uint32, clusters []uint32, pairs []histogramPair, num_clusters uint, symbols_size uint, max_clusters uint, max_num_pairs uint) uint {
var cost_diff_threshold float64 = 0.0
var min_cluster_size uint = 1
var num_pairs uint = 0
{
/* We maintain a vector of histogram pairs, with the property that the pair
with the maximum bit cost reduction is the first. */
var idx1 uint
for idx1 = 0; idx1 < num_clusters; idx1++ {
var idx2 uint
for idx2 = idx1 + 1; idx2 < num_clusters; idx2++ {
compareAndPushToQueueLiteral(out, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, pairs[0:], &num_pairs)
}
}
}
for num_clusters > min_cluster_size {
var best_idx1 uint32
var best_idx2 uint32
var i uint
if pairs[0].cost_diff >= cost_diff_threshold {
cost_diff_threshold = 1e99
min_cluster_size = max_clusters
continue
}
/* Take the best pair from the top of heap. */
best_idx1 = pairs[0].idx1
best_idx2 = pairs[0].idx2
histogramAddHistogramLiteral(&out[best_idx1], &out[best_idx2])
out[best_idx1].bit_cost_ = pairs[0].cost_combo
cluster_size[best_idx1] += cluster_size[best_idx2]
for i = 0; i < symbols_size; i++ {
if symbols[i] == best_idx2 {
symbols[i] = best_idx1
}
}
for i = 0; i < num_clusters; i++ {
if clusters[i] == best_idx2 {
copy(clusters[i:], clusters[i+1:][:num_clusters-i-1])
break
}
}
num_clusters--
{
/* Remove pairs intersecting the just combined best pair. */
var copy_to_idx uint = 0
for i = 0; i < num_pairs; i++ {
var p *histogramPair = &pairs[i]
if p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2 {
/* Remove invalid pair from the queue. */
continue
}
if histogramPairIsLess(&pairs[0], p) {
/* Replace the top of the queue if needed. */
var front histogramPair = pairs[0]
pairs[0] = *p
pairs[copy_to_idx] = front
} else {
pairs[copy_to_idx] = *p
}
copy_to_idx++
}
num_pairs = copy_to_idx
}
/* Push new pairs formed with the combined histogram to the heap. */
for i = 0; i < num_clusters; i++ {
compareAndPushToQueueLiteral(out, cluster_size, best_idx1, clusters[i], max_num_pairs, pairs[0:], &num_pairs)
}
}
return num_clusters
}
/* What is the bit cost of moving histogram from cur_symbol to candidate. */
func histogramBitCostDistanceLiteral(histogram *histogramLiteral, candidate *histogramLiteral) float64 {
if histogram.total_count_ == 0 {
return 0.0
} else {
var tmp histogramLiteral = *histogram
histogramAddHistogramLiteral(&tmp, candidate)
return populationCostLiteral(&tmp) - candidate.bit_cost_
}
}
/* Find the best 'out' histogram for each of the 'in' histograms.
When called, clusters[0..num_clusters) contains the unique values from
symbols[0..in_size), but this property is not preserved in this function.
Note: we assume that out[]->bit_cost_ is already up-to-date. */
func histogramRemapLiteral(in []histogramLiteral, in_size uint, clusters []uint32, num_clusters uint, out []histogramLiteral, symbols []uint32) {
var i uint
for i = 0; i < in_size; i++ {
var best_out uint32
if i == 0 {
best_out = symbols[0]
} else {
best_out = symbols[i-1]
}
var best_bits float64 = histogramBitCostDistanceLiteral(&in[i], &out[best_out])
var j uint
for j = 0; j < num_clusters; j++ {
var cur_bits float64 = histogramBitCostDistanceLiteral(&in[i], &out[clusters[j]])
if cur_bits < best_bits {
best_bits = cur_bits
best_out = clusters[j]
}
}
symbols[i] = best_out
}
/* Recompute each out based on raw and symbols. */
for i = 0; i < num_clusters; i++ {
histogramClearLiteral(&out[clusters[i]])
}
for i = 0; i < in_size; i++ {
histogramAddHistogramLiteral(&out[symbols[i]], &in[i])
}
}
/* Reorders elements of the out[0..length) array and changes values in
symbols[0..length) array in the following way:
* when called, symbols[] contains indexes into out[], and has N unique
values (possibly N < length)
* on return, symbols'[i] = f(symbols[i]) and
out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
where f is a bijection between the range of symbols[] and [0..N), and
the first occurrences of values in symbols'[i] come in consecutive
increasing order.
Returns N, the number of unique values in symbols[]. */
var histogramReindexLiteral_kInvalidIndex uint32 = math.MaxUint32
func histogramReindexLiteral(out []histogramLiteral, symbols []uint32, length uint) uint {
var new_index []uint32 = make([]uint32, length)
var next_index uint32
var tmp []histogramLiteral
var i uint
for i = 0; i < length; i++ {
new_index[i] = histogramReindexLiteral_kInvalidIndex
}
next_index = 0
for i = 0; i < length; i++ {
if new_index[symbols[i]] == histogramReindexLiteral_kInvalidIndex {
new_index[symbols[i]] = next_index
next_index++
}
}
/* TODO: by using idea of "cycle-sort" we can avoid allocation of
tmp and reduce the number of copying by the factor of 2. */
tmp = make([]histogramLiteral, next_index)
next_index = 0
for i = 0; i < length; i++ {
if new_index[symbols[i]] == next_index {
tmp[next_index] = out[symbols[i]]
next_index++
}
symbols[i] = new_index[symbols[i]]
}
new_index = nil
for i = 0; uint32(i) < next_index; i++ {
out[i] = tmp[i]
}
tmp = nil
return uint(next_index)
}
func clusterHistogramsLiteral(in []histogramLiteral, in_size uint, max_histograms uint, out []histogramLiteral, out_size *uint, histogram_symbols []uint32) {
var cluster_size []uint32 = make([]uint32, in_size)
var clusters []uint32 = make([]uint32, in_size)
var num_clusters uint = 0
var max_input_histograms uint = 64
var pairs_capacity uint = max_input_histograms * max_input_histograms / 2
var pairs []histogramPair = make([]histogramPair, (pairs_capacity + 1))
var i uint
/* For the first pass of clustering, we allow all pairs. */
for i = 0; i < in_size; i++ {
cluster_size[i] = 1
}
for i = 0; i < in_size; i++ {
out[i] = in[i]
out[i].bit_cost_ = populationCostLiteral(&in[i])
histogram_symbols[i] = uint32(i)
}
for i = 0; i < in_size; i += max_input_histograms {
var num_to_combine uint = brotli_min_size_t(in_size-i, max_input_histograms)
var num_new_clusters uint
var j uint
for j = 0; j < num_to_combine; j++ {
clusters[num_clusters+j] = uint32(i + j)
}
num_new_clusters = histogramCombineLiteral(out, cluster_size, histogram_symbols[i:], clusters[num_clusters:], pairs, num_to_combine, num_to_combine, max_histograms, pairs_capacity)
num_clusters += num_new_clusters
}
{
/* For the second pass, we limit the total number of histogram pairs.
After this limit is reached, we only keep searching for the best pair. */
var max_num_pairs uint = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
if pairs_capacity < (max_num_pairs + 1) {
var _new_size uint
if pairs_capacity == 0 {
_new_size = max_num_pairs + 1
} else {
_new_size = pairs_capacity
}
var new_array []histogramPair
for _new_size < (max_num_pairs + 1) {
_new_size *= 2
}
new_array = make([]histogramPair, _new_size)
if pairs_capacity != 0 {
copy(new_array, pairs[:pairs_capacity])
}
pairs = new_array
pairs_capacity = _new_size
}
/* Collapse similar histograms. */
num_clusters = histogramCombineLiteral(out, cluster_size, histogram_symbols, clusters, pairs, num_clusters, in_size, max_histograms, max_num_pairs)
}
pairs = nil
cluster_size = nil
/* Find the optimal map from original histograms to the final ones. */
histogramRemapLiteral(in, in_size, clusters, num_clusters, out, histogram_symbols)
clusters = nil
/* Convert the context map to a canonical form. */
*out_size = histogramReindexLiteral(out, histogram_symbols, in_size)
}
+254
View File
@@ -0,0 +1,254 @@
package brotli
var kInsBase = []uint32{
0,
1,
2,
3,
4,
5,
6,
8,
10,
14,
18,
26,
34,
50,
66,
98,
130,
194,
322,
578,
1090,
2114,
6210,
22594,
}
var kInsExtra = []uint32{
0,
0,
0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
7,
8,
9,
10,
12,
14,
24,
}
var kCopyBase = []uint32{
2,
3,
4,
5,
6,
7,
8,
9,
10,
12,
14,
18,
22,
30,
38,
54,
70,
102,
134,
198,
326,
582,
1094,
2118,
}
var kCopyExtra = []uint32{
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
7,
8,
9,
10,
24,
}
func getInsertLengthCode(insertlen uint) uint16 {
if insertlen < 6 {
return uint16(insertlen)
} else if insertlen < 130 {
var nbits uint32 = log2FloorNonZero(insertlen-2) - 1
return uint16((nbits << 1) + uint32((insertlen-2)>>nbits) + 2)
} else if insertlen < 2114 {
return uint16(log2FloorNonZero(insertlen-66) + 10)
} else if insertlen < 6210 {
return 21
} else if insertlen < 22594 {
return 22
} else {
return 23
}
}
func getCopyLengthCode(copylen uint) uint16 {
if copylen < 10 {
return uint16(copylen - 2)
} else if copylen < 134 {
var nbits uint32 = log2FloorNonZero(copylen-6) - 1
return uint16((nbits << 1) + uint32((copylen-6)>>nbits) + 4)
} else if copylen < 2118 {
return uint16(log2FloorNonZero(copylen-70) + 12)
} else {
return 23
}
}
func combineLengthCodes(inscode uint16, copycode uint16, use_last_distance bool) uint16 {
var bits64 uint16 = uint16(copycode&0x7 | (inscode&0x7)<<3)
if use_last_distance && inscode < 8 && copycode < 16 {
if copycode < 8 {
return bits64
} else {
return bits64 | 64
}
} else {
/* Specification: 5 Encoding of ... (last table) */
/* offset = 2 * index, where index is in range [0..8] */
var offset uint32 = 2 * ((uint32(copycode) >> 3) + 3*(uint32(inscode)>>3))
/* All values in specification are K * 64,
where K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
i + 1 = [1, 2, 3, 4, 5, 6, 7, 8, 9],
K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1, 2] = D.
All values in D require only 2 bits to encode.
Magic constant is shifted 6 bits left, to avoid final multiplication. */
offset = (offset << 5) + 0x40 + ((0x520D40 >> offset) & 0xC0)
return uint16(offset | uint32(bits64))
}
}
func getLengthCode(insertlen uint, copylen uint, use_last_distance bool, code *uint16) {
var inscode uint16 = getInsertLengthCode(insertlen)
var copycode uint16 = getCopyLengthCode(copylen)
*code = combineLengthCodes(inscode, copycode, use_last_distance)
}
func getInsertBase(inscode uint16) uint32 {
return kInsBase[inscode]
}
func getInsertExtra(inscode uint16) uint32 {
return kInsExtra[inscode]
}
func getCopyBase(copycode uint16) uint32 {
return kCopyBase[copycode]
}
func getCopyExtra(copycode uint16) uint32 {
return kCopyExtra[copycode]
}
type command struct {
insert_len_ uint32
copy_len_ uint32
dist_extra_ uint32
cmd_prefix_ uint16
dist_prefix_ uint16
}
/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
func makeCommand(dist *distanceParams, insertlen uint, copylen uint, copylen_code_delta int, distance_code uint) (cmd command) {
/* Don't rely on signed int representation, use honest casts. */
var delta uint32 = uint32(byte(int8(copylen_code_delta)))
cmd.insert_len_ = uint32(insertlen)
cmd.copy_len_ = uint32(uint32(copylen) | delta<<25)
/* The distance prefix and extra bits are stored in this Command as if
npostfix and ndirect were 0, they are only recomputed later after the
clustering if needed. */
prefixEncodeCopyDistance(distance_code, uint(dist.num_direct_distance_codes), uint(dist.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_)
getLengthCode(insertlen, uint(int(copylen)+copylen_code_delta), (cmd.dist_prefix_&0x3FF == 0), &cmd.cmd_prefix_)
return cmd
}
func makeInsertCommand(insertlen uint) (cmd command) {
cmd.insert_len_ = uint32(insertlen)
cmd.copy_len_ = 4 << 25
cmd.dist_extra_ = 0
cmd.dist_prefix_ = numDistanceShortCodes
getLengthCode(insertlen, 4, false, &cmd.cmd_prefix_)
return cmd
}
func commandRestoreDistanceCode(self *command, dist *distanceParams) uint32 {
if uint32(self.dist_prefix_&0x3FF) < numDistanceShortCodes+dist.num_direct_distance_codes {
return uint32(self.dist_prefix_) & 0x3FF
} else {
var dcode uint32 = uint32(self.dist_prefix_) & 0x3FF
var nbits uint32 = uint32(self.dist_prefix_) >> 10
var extra uint32 = self.dist_extra_
var postfix_mask uint32 = (1 << dist.distance_postfix_bits) - 1
var hcode uint32 = (dcode - dist.num_direct_distance_codes - numDistanceShortCodes) >> dist.distance_postfix_bits
var lcode uint32 = (dcode - dist.num_direct_distance_codes - numDistanceShortCodes) & postfix_mask
var offset uint32 = ((2 + (hcode & 1)) << nbits) - 4
return ((offset + extra) << dist.distance_postfix_bits) + lcode + dist.num_direct_distance_codes + numDistanceShortCodes
}
}
func commandDistanceContext(self *command) uint32 {
var r uint32 = uint32(self.cmd_prefix_) >> 6
var c uint32 = uint32(self.cmd_prefix_) & 7
if (r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2) {
return c
}
return 3
}
func commandCopyLen(self *command) uint32 {
return self.copy_len_ & 0x1FFFFFF
}
func commandCopyLenCode(self *command) uint32 {
var modifier uint32 = self.copy_len_ >> 25
var delta int32 = int32(int8(byte(modifier | (modifier&0x40)<<1)))
return uint32(int32(self.copy_len_&0x1FFFFFF) + delta)
}
+834
View File
@@ -0,0 +1,834 @@
package brotli
import "encoding/binary"
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses one-pass processing: when we find a backward
match, we immediately emit the corresponding command and literal codes to
the bit stream.
Adapted from the CompressFragment() function in
https://github.com/google/snappy/blob/master/snappy.cc */
const maxDistance_compress_fragment = 262128
func hash5(p []byte, shift uint) uint32 {
var h uint64 = (binary.LittleEndian.Uint64(p) << 24) * uint64(kHashMul32)
return uint32(h >> shift)
}
func hashBytesAtOffset5(v uint64, offset int, shift uint) uint32 {
assert(offset >= 0)
assert(offset <= 3)
{
var h uint64 = ((v >> uint(8*offset)) << 24) * uint64(kHashMul32)
return uint32(h >> shift)
}
}
func isMatch5(p1 []byte, p2 []byte) bool {
return binary.LittleEndian.Uint32(p1) == binary.LittleEndian.Uint32(p2) &&
p1[4] == p2[4]
}
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
of the "input" string and stores it into the bit stream.
Note that the prefix code here is built from the pre-LZ77 input, therefore
we can only approximate the statistics of the actual literal stream.
Moreover, for long inputs we build a histogram from a sample of the input
and thus have to assign a non-zero depth for each literal.
Returns estimated compression ratio millibytes/char for encoding given input
with generated code. */
func buildAndStoreLiteralPrefixCode(input []byte, input_size uint, depths []byte, bits []uint16, storage_ix *uint, storage []byte) uint {
var histogram = [256]uint32{0}
var histogram_total uint
var i uint
if input_size < 1<<15 {
for i = 0; i < input_size; i++ {
histogram[input[i]]++
}
histogram_total = input_size
for i = 0; i < 256; i++ {
/* We weigh the first 11 samples with weight 3 to account for the
balancing effect of the LZ77 phase on the histogram. */
var adjust uint32 = 2 * brotli_min_uint32_t(histogram[i], 11)
histogram[i] += adjust
histogram_total += uint(adjust)
}
} else {
const kSampleRate uint = 29
for i = 0; i < input_size; i += kSampleRate {
histogram[input[i]]++
}
histogram_total = (input_size + kSampleRate - 1) / kSampleRate
for i = 0; i < 256; i++ {
/* We add 1 to each population count to avoid 0 bit depths (since this is
only a sample and we don't know if the symbol appears or not), and we
weigh the first 11 samples with weight 3 to account for the balancing
effect of the LZ77 phase on the histogram (more frequent symbols are
more likely to be in backward references instead as literals). */
var adjust uint32 = 1 + 2*brotli_min_uint32_t(histogram[i], 11)
histogram[i] += adjust
histogram_total += uint(adjust)
}
}
buildAndStoreHuffmanTreeFast(histogram[:], histogram_total, /* max_bits = */
8, depths, bits, storage_ix, storage)
{
var literal_ratio uint = 0
for i = 0; i < 256; i++ {
if histogram[i] != 0 {
literal_ratio += uint(histogram[i] * uint32(depths[i]))
}
}
/* Estimated encoding ratio, millibytes per symbol. */
return (literal_ratio * 125) / histogram_total
}
}
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
func buildAndStoreCommandPrefixCode1(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
var tree [129]huffmanTree
var cmd_depth = [numCommandSymbols]byte{0}
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
var cmd_bits [64]uint16
createHuffmanTree(histogram, 64, 15, tree[:], depth)
createHuffmanTree(histogram[64:], 64, 14, tree[:], depth[64:])
/* We have to jump through a few hoops here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
copy(cmd_depth[:], depth[:24])
copy(cmd_depth[24:][:], depth[40:][:8])
copy(cmd_depth[32:][:], depth[24:][:8])
copy(cmd_depth[40:][:], depth[48:][:8])
copy(cmd_depth[48:][:], depth[32:][:8])
copy(cmd_depth[56:][:], depth[56:][:8])
convertBitDepthsToSymbols(cmd_depth[:], 64, cmd_bits[:])
copy(bits, cmd_bits[:24])
copy(bits[24:], cmd_bits[32:][:8])
copy(bits[32:], cmd_bits[48:][:8])
copy(bits[40:], cmd_bits[24:][:8])
copy(bits[48:], cmd_bits[40:][:8])
copy(bits[56:], cmd_bits[56:][:8])
convertBitDepthsToSymbols(depth[64:], 64, bits[64:])
{
/* Create the bit length array for the full command alphabet. */
var i uint
for i := 0; i < int(64); i++ {
cmd_depth[i] = 0
} /* only 64 first values were used */
copy(cmd_depth[:], depth[:8])
copy(cmd_depth[64:][:], depth[8:][:8])
copy(cmd_depth[128:][:], depth[16:][:8])
copy(cmd_depth[192:][:], depth[24:][:8])
copy(cmd_depth[384:][:], depth[32:][:8])
for i = 0; i < 8; i++ {
cmd_depth[128+8*i] = depth[40+i]
cmd_depth[256+8*i] = depth[48+i]
cmd_depth[448+8*i] = depth[56+i]
}
storeHuffmanTree(cmd_depth[:], numCommandSymbols, tree[:], storage_ix, storage)
}
storeHuffmanTree(depth[64:], 64, tree[:], storage_ix, storage)
}
/* REQUIRES: insertlen < 6210 */
func emitInsertLen1(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
if insertlen < 6 {
var code uint = insertlen + 40
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
histo[code]++
} else if insertlen < 130 {
var tail uint = insertlen - 2
var nbits uint32 = log2FloorNonZero(tail) - 1
var prefix uint = tail >> nbits
var inscode uint = uint((nbits << 1) + uint32(prefix) + 42)
writeBits(uint(depth[inscode]), uint64(bits[inscode]), storage_ix, storage)
writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
histo[inscode]++
} else if insertlen < 2114 {
var tail uint = insertlen - 66
var nbits uint32 = log2FloorNonZero(tail)
var code uint = uint(nbits + 50)
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
histo[code]++
} else {
writeBits(uint(depth[61]), uint64(bits[61]), storage_ix, storage)
writeBits(12, uint64(insertlen)-2114, storage_ix, storage)
histo[61]++
}
}
func emitLongInsertLen(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
if insertlen < 22594 {
writeBits(uint(depth[62]), uint64(bits[62]), storage_ix, storage)
writeBits(14, uint64(insertlen)-6210, storage_ix, storage)
histo[62]++
} else {
writeBits(uint(depth[63]), uint64(bits[63]), storage_ix, storage)
writeBits(24, uint64(insertlen)-22594, storage_ix, storage)
histo[63]++
}
}
func emitCopyLen1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
if copylen < 10 {
writeBits(uint(depth[copylen+14]), uint64(bits[copylen+14]), storage_ix, storage)
histo[copylen+14]++
} else if copylen < 134 {
var tail uint = copylen - 6
var nbits uint32 = log2FloorNonZero(tail) - 1
var prefix uint = tail >> nbits
var code uint = uint((nbits << 1) + uint32(prefix) + 20)
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
histo[code]++
} else if copylen < 2118 {
var tail uint = copylen - 70
var nbits uint32 = log2FloorNonZero(tail)
var code uint = uint(nbits + 28)
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
histo[code]++
} else {
writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
writeBits(24, uint64(copylen)-2118, storage_ix, storage)
histo[39]++
}
}
func emitCopyLenLastDistance1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
if copylen < 12 {
writeBits(uint(depth[copylen-4]), uint64(bits[copylen-4]), storage_ix, storage)
histo[copylen-4]++
} else if copylen < 72 {
var tail uint = copylen - 8
var nbits uint32 = log2FloorNonZero(tail) - 1
var prefix uint = tail >> nbits
var code uint = uint((nbits << 1) + uint32(prefix) + 4)
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
histo[code]++
} else if copylen < 136 {
var tail uint = copylen - 8
var code uint = (tail >> 5) + 30
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
writeBits(5, uint64(tail)&31, storage_ix, storage)
writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
histo[code]++
histo[64]++
} else if copylen < 2120 {
var tail uint = copylen - 72
var nbits uint32 = log2FloorNonZero(tail)
var code uint = uint(nbits + 28)
writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
histo[code]++
histo[64]++
} else {
writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
writeBits(24, uint64(copylen)-2120, storage_ix, storage)
writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
histo[39]++
histo[64]++
}
}
func emitDistance1(distance uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
var d uint = distance + 3
var nbits uint32 = log2FloorNonZero(d) - 1
var prefix uint = (d >> nbits) & 1
var offset uint = (2 + prefix) << nbits
var distcode uint = uint(2*(nbits-1) + uint32(prefix) + 80)
writeBits(uint(depth[distcode]), uint64(bits[distcode]), storage_ix, storage)
writeBits(uint(nbits), uint64(d)-uint64(offset), storage_ix, storage)
histo[distcode]++
}
func emitLiterals(input []byte, len uint, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
var j uint
for j = 0; j < len; j++ {
var lit byte = input[j]
writeBits(uint(depth[lit]), uint64(bits[lit]), storage_ix, storage)
}
}
/* REQUIRES: len <= 1 << 24. */
func storeMetaBlockHeader1(len uint, is_uncompressed bool, storage_ix *uint, storage []byte) {
var nibbles uint = 6
/* ISLAST */
writeBits(1, 0, storage_ix, storage)
if len <= 1<<16 {
nibbles = 4
} else if len <= 1<<20 {
nibbles = 5
}
writeBits(2, uint64(nibbles)-4, storage_ix, storage)
writeBits(nibbles*4, uint64(len)-1, storage_ix, storage)
/* ISUNCOMPRESSED */
writeSingleBit(is_uncompressed, storage_ix, storage)
}
func updateBits(n_bits uint, bits uint32, pos uint, array []byte) {
for n_bits > 0 {
var byte_pos uint = pos >> 3
var n_unchanged_bits uint = pos & 7
var n_changed_bits uint = brotli_min_size_t(n_bits, 8-n_unchanged_bits)
var total_bits uint = n_unchanged_bits + n_changed_bits
var mask uint32 = (^((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1)
var unchanged_bits uint32 = uint32(array[byte_pos]) & mask
var changed_bits uint32 = bits & ((1 << n_changed_bits) - 1)
array[byte_pos] = byte(changed_bits<<n_unchanged_bits | unchanged_bits)
n_bits -= n_changed_bits
bits >>= n_changed_bits
pos += n_changed_bits
}
}
func rewindBitPosition1(new_storage_ix uint, storage_ix *uint, storage []byte) {
var bitpos uint = new_storage_ix & 7
var mask uint = (1 << bitpos) - 1
storage[new_storage_ix>>3] &= byte(mask)
*storage_ix = new_storage_ix
}
var shouldMergeBlock_kSampleRate uint = 43
func shouldMergeBlock(data []byte, len uint, depths []byte) bool {
var histo = [256]uint{0}
var i uint
for i = 0; i < len; i += shouldMergeBlock_kSampleRate {
histo[data[i]]++
}
{
var total uint = (len + shouldMergeBlock_kSampleRate - 1) / shouldMergeBlock_kSampleRate
var r float64 = (fastLog2(total)+0.5)*float64(total) + 200
for i = 0; i < 256; i++ {
r -= float64(histo[i]) * (float64(depths[i]) + fastLog2(histo[i]))
}
return r >= 0.0
}
}
func shouldUseUncompressedMode(metablock_start []byte, next_emit []byte, insertlen uint, literal_ratio uint) bool {
var compressed uint = uint(-cap(next_emit) + cap(metablock_start))
if compressed*50 > insertlen {
return false
} else {
return literal_ratio > 980
}
}
func emitUncompressedMetaBlock1(begin []byte, end []byte, storage_ix_start uint, storage_ix *uint, storage []byte) {
var len uint = uint(-cap(end) + cap(begin))
rewindBitPosition1(storage_ix_start, storage_ix, storage)
storeMetaBlockHeader1(uint(len), true, storage_ix, storage)
*storage_ix = (*storage_ix + 7) &^ 7
copy(storage[*storage_ix>>3:], begin[:len])
*storage_ix += uint(len << 3)
storage[*storage_ix>>3] = 0
}
var kCmdHistoSeed = [128]uint32{
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
0,
0,
0,
}
var compressFragmentFastImpl_kFirstBlockSize uint = 3 << 15
var compressFragmentFastImpl_kMergeBlockSize uint = 1 << 16
func compressFragmentFastImpl(in []byte, input_size uint, is_last bool, table []int, table_bits uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits *uint, cmd_code []byte, storage_ix *uint, storage []byte) {
var cmd_histo [128]uint32
var ip_end int
var next_emit int = 0
var base_ip int = 0
var input int = 0
const kInputMarginBytes uint = windowGap
const kMinMatchLen uint = 5
var metablock_start int = input
var block_size uint = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
var total_block_size uint = block_size
var mlen_storage_ix uint = *storage_ix + 3
var lit_depth [256]byte
var lit_bits [256]uint16
var literal_ratio uint
var ip int
var last_distance int
var shift uint = 64 - table_bits
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
/* Save the start of the first block for position and distance computations.
*/
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
storeMetaBlockHeader1(block_size, false, storage_ix, storage)
/* No block splits, no contexts. */
writeBits(13, 0, storage_ix, storage)
literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
{
/* Store the pre-compressed command and distance prefix codes. */
var i uint
for i = 0; i+7 < *cmd_code_numbits; i += 8 {
writeBits(8, uint64(cmd_code[i>>3]), storage_ix, storage)
}
}
writeBits(*cmd_code_numbits&7, uint64(cmd_code[*cmd_code_numbits>>3]), storage_ix, storage)
/* Initialize the command and distance histograms. We will gather
statistics of command and distance codes during the processing
of this block and use it to update the command and distance
prefix codes for the next block. */
emit_commands:
copy(cmd_histo[:], kCmdHistoSeed[:])
/* "ip" is the input pointer. */
ip = input
last_distance = -1
ip_end = int(uint(input) + block_size)
if block_size >= kInputMarginBytes {
var len_limit uint = brotli_min_size_t(block_size-kMinMatchLen, input_size-kInputMarginBytes)
var ip_limit int = int(uint(input) + len_limit)
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
var next_hash uint32
ip++
for next_hash = hash5(in[ip:], shift); ; {
var skip uint32 = 32
var next_ip int = ip
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (i.e. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
var candidate int
assert(next_emit < ip)
trawl:
for {
var hash uint32 = next_hash
var bytes_between_hash_lookups uint32 = skip >> 5
skip++
assert(hash == hash5(in[next_ip:], shift))
ip = next_ip
next_ip = int(uint32(ip) + bytes_between_hash_lookups)
if next_ip > ip_limit {
goto emit_remainder
}
next_hash = hash5(in[next_ip:], shift)
candidate = ip - last_distance
if isMatch5(in[ip:], in[candidate:]) {
if candidate < ip {
table[hash] = int(ip - base_ip)
break
}
}
candidate = base_ip + table[hash]
assert(candidate >= base_ip)
assert(candidate < ip)
table[hash] = int(ip - base_ip)
if isMatch5(in[ip:], in[candidate:]) {
break
}
}
/* Check copy distance. If candidate is not feasible, continue search.
Checking is done outside of hot loop to reduce overhead. */
if ip-candidate > maxDistance_compress_fragment {
goto trawl
}
/* Step 2: Emit the found match together with the literal bytes from
"next_emit" to the bit stream, and then see if we can find a next match
immediately afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
{
var base int = ip
/* > 0 */
var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
var distance int = int(base - candidate)
/* We have a 5-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
var insert uint = uint(base - next_emit)
ip += int(matched)
if insert < 6210 {
emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
emitUncompressedMetaBlock1(in[metablock_start:], in[base:], mlen_storage_ix-3, storage_ix, storage)
input_size -= uint(base - input)
input = base
next_emit = input
goto next_block
} else {
emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
}
emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
if distance == last_distance {
writeBits(uint(cmd_depth[64]), uint64(cmd_bits[64]), storage_ix, storage)
cmd_histo[64]++
} else {
emitDistance1(uint(distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
last_distance = distance
}
emitCopyLenLastDistance1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
next_emit = ip
if ip >= ip_limit {
goto emit_remainder
}
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
{
var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
table[prev_hash] = int(ip - base_ip - 3)
prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
table[prev_hash] = int(ip - base_ip - 2)
prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
table[prev_hash] = int(ip - base_ip - 1)
candidate = base_ip + table[cur_hash]
table[cur_hash] = int(ip - base_ip)
}
}
for isMatch5(in[ip:], in[candidate:]) {
var base int = ip
/* We have a 5-byte match at ip, and no need to emit any literal bytes
prior to ip. */
var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
if ip-candidate > maxDistance_compress_fragment {
break
}
ip += int(matched)
last_distance = int(base - candidate) /* > 0 */
emitCopyLen1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
emitDistance1(uint(last_distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
next_emit = ip
if ip >= ip_limit {
goto emit_remainder
}
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
{
var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
table[prev_hash] = int(ip - base_ip - 3)
prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
table[prev_hash] = int(ip - base_ip - 2)
prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
table[prev_hash] = int(ip - base_ip - 1)
candidate = base_ip + table[cur_hash]
table[cur_hash] = int(ip - base_ip)
}
}
ip++
next_hash = hash5(in[ip:], shift)
}
}
emit_remainder:
assert(next_emit <= ip_end)
input += int(block_size)
input_size -= block_size
block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kMergeBlockSize)
/* Decide if we want to continue this meta-block instead of emitting the
last insert-only command. */
if input_size > 0 && total_block_size+block_size <= 1<<20 && shouldMergeBlock(in[input:], block_size, lit_depth[:]) {
assert(total_block_size > 1<<16)
/* Update the size of the current meta-block and continue emitting commands.
We can do this because the current size and the new size both have 5
nibbles. */
total_block_size += block_size
updateBits(20, uint32(total_block_size-1), mlen_storage_ix, storage)
goto emit_commands
}
/* Emit the remaining bytes as literals. */
if next_emit < ip_end {
var insert uint = uint(ip_end - next_emit)
if insert < 6210 {
emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
emitUncompressedMetaBlock1(in[metablock_start:], in[ip_end:], mlen_storage_ix-3, storage_ix, storage)
} else {
emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
}
}
next_emit = ip_end
/* If we have more data, write a new meta-block header and prefix codes and
then continue emitting commands. */
next_block:
if input_size > 0 {
metablock_start = input
block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
total_block_size = block_size
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
mlen_storage_ix = *storage_ix + 3
storeMetaBlockHeader1(block_size, false, storage_ix, storage)
/* No block splits, no contexts. */
writeBits(13, 0, storage_ix, storage)
literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, storage_ix, storage)
goto emit_commands
}
if !is_last {
/* If this is not the last block, update the command and distance prefix
codes for the next block and store the compressed forms. */
cmd_code[0] = 0
*cmd_code_numbits = 0
buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, cmd_code_numbits, cmd_code)
}
}
/* Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block.
"cmd_depth" and "cmd_bits" contain the command and distance prefix codes
(see comment in encode.h) used for the encoding of this input fragment.
If "is_last" is 0, they are updated to reflect the statistics
of this input fragment, to be used for the encoding of the next fragment.
"*cmd_code_numbits" is the number of bits of the compressed representation
of the command and distance prefix codes, and "cmd_code" is an array of
at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
command and distance prefix codes. If "is_last" is 0, these are also
updated to represent the updated "cmd_depth" and "cmd_bits".
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
OUTPUT: maximal copy distance <= |input_size|
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
func compressFragmentFast(input []byte, input_size uint, is_last bool, table []int, table_size uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits *uint, cmd_code []byte, storage_ix *uint, storage []byte) {
var initial_storage_ix uint = *storage_ix
var table_bits uint = uint(log2FloorNonZero(table_size))
if input_size == 0 {
assert(is_last)
writeBits(1, 1, storage_ix, storage) /* islast */
writeBits(1, 1, storage_ix, storage) /* isempty */
*storage_ix = (*storage_ix + 7) &^ 7
return
}
compressFragmentFastImpl(input, input_size, is_last, table, table_bits, cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage)
/* If output is larger than single uncompressed block, rewrite it. */
if *storage_ix-initial_storage_ix > 31+(input_size<<3) {
emitUncompressedMetaBlock1(input, input[input_size:], initial_storage_ix, storage_ix, storage)
}
if is_last {
writeBits(1, 1, storage_ix, storage) /* islast */
writeBits(1, 1, storage_ix, storage) /* isempty */
*storage_ix = (*storage_ix + 7) &^ 7
}
}
+773
View File
@@ -0,0 +1,773 @@
package brotli
import "encoding/binary"
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses two-pass processing: in the first pass we save
the found backward matches and literal bytes into a buffer, and in the
second pass we emit them into the bit stream using prefix codes built based
on the actual command and literal byte histograms. */
const kCompressFragmentTwoPassBlockSize uint = 1 << 17
func hash1(p []byte, shift uint, length uint) uint32 {
var h uint64 = (binary.LittleEndian.Uint64(p) << ((8 - length) * 8)) * uint64(kHashMul32)
return uint32(h >> shift)
}
func hashBytesAtOffset(v uint64, offset uint, shift uint, length uint) uint32 {
assert(offset <= 8-length)
{
var h uint64 = ((v >> (8 * offset)) << ((8 - length) * 8)) * uint64(kHashMul32)
return uint32(h >> shift)
}
}
func isMatch1(p1 []byte, p2 []byte, length uint) bool {
if binary.LittleEndian.Uint32(p1) != binary.LittleEndian.Uint32(p2) {
return false
}
if length == 4 {
return true
}
return p1[4] == p2[4] && p1[5] == p2[5]
}
/*
Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream.
*/
func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
var tree [129]huffmanTree
var cmd_depth = [numCommandSymbols]byte{0}
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
var cmd_bits [64]uint16
createHuffmanTree(histogram, 64, 15, tree[:], depth)
createHuffmanTree(histogram[64:], 64, 14, tree[:], depth[64:])
/* We have to jump through a few hoops here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
copy(cmd_depth[:], depth[24:][:24])
copy(cmd_depth[24:][:], depth[:8])
copy(cmd_depth[32:][:], depth[48:][:8])
copy(cmd_depth[40:][:], depth[8:][:8])
copy(cmd_depth[48:][:], depth[56:][:8])
copy(cmd_depth[56:][:], depth[16:][:8])
convertBitDepthsToSymbols(cmd_depth[:], 64, cmd_bits[:])
copy(bits, cmd_bits[24:][:8])
copy(bits[8:], cmd_bits[40:][:8])
copy(bits[16:], cmd_bits[56:][:8])
copy(bits[24:], cmd_bits[:24])
copy(bits[48:], cmd_bits[32:][:8])
copy(bits[56:], cmd_bits[48:][:8])
convertBitDepthsToSymbols(depth[64:], 64, bits[64:])
{
/* Create the bit length array for the full command alphabet. */
var i uint
for i := 0; i < int(64); i++ {
cmd_depth[i] = 0
} /* only 64 first values were used */
copy(cmd_depth[:], depth[24:][:8])
copy(cmd_depth[64:][:], depth[32:][:8])
copy(cmd_depth[128:][:], depth[40:][:8])
copy(cmd_depth[192:][:], depth[48:][:8])
copy(cmd_depth[384:][:], depth[56:][:8])
for i = 0; i < 8; i++ {
cmd_depth[128+8*i] = depth[i]
cmd_depth[256+8*i] = depth[8+i]
cmd_depth[448+8*i] = depth[16+i]
}
storeHuffmanTree(cmd_depth[:], numCommandSymbols, tree[:], storage_ix, storage)
}
storeHuffmanTree(depth[64:], 64, tree[:], storage_ix, storage)
}
func emitInsertLen(insertlen uint32, commands *[]uint32) {
if insertlen < 6 {
(*commands)[0] = insertlen
} else if insertlen < 130 {
var tail uint32 = insertlen - 2
var nbits uint32 = log2FloorNonZero(uint(tail)) - 1
var prefix uint32 = tail >> nbits
var inscode uint32 = (nbits << 1) + prefix + 2
var extra uint32 = tail - (prefix << nbits)
(*commands)[0] = inscode | extra<<8
} else if insertlen < 2114 {
var tail uint32 = insertlen - 66
var nbits uint32 = log2FloorNonZero(uint(tail))
var code uint32 = nbits + 10
var extra uint32 = tail - (1 << nbits)
(*commands)[0] = code | extra<<8
} else if insertlen < 6210 {
var extra uint32 = insertlen - 2114
(*commands)[0] = 21 | extra<<8
} else if insertlen < 22594 {
var extra uint32 = insertlen - 6210
(*commands)[0] = 22 | extra<<8
} else {
var extra uint32 = insertlen - 22594
(*commands)[0] = 23 | extra<<8
}
*commands = (*commands)[1:]
}
func emitCopyLen(copylen uint, commands *[]uint32) {
if copylen < 10 {
(*commands)[0] = uint32(copylen + 38)
} else if copylen < 134 {
var tail uint = copylen - 6
var nbits uint = uint(log2FloorNonZero(tail) - 1)
var prefix uint = tail >> nbits
var code uint = (nbits << 1) + prefix + 44
var extra uint = tail - (prefix << nbits)
(*commands)[0] = uint32(code | extra<<8)
} else if copylen < 2118 {
var tail uint = copylen - 70
var nbits uint = uint(log2FloorNonZero(tail))
var code uint = nbits + 52
var extra uint = tail - (uint(1) << nbits)
(*commands)[0] = uint32(code | extra<<8)
} else {
var extra uint = copylen - 2118
(*commands)[0] = uint32(63 | extra<<8)
}
*commands = (*commands)[1:]
}
func emitCopyLenLastDistance(copylen uint, commands *[]uint32) {
if copylen < 12 {
(*commands)[0] = uint32(copylen + 20)
*commands = (*commands)[1:]
} else if copylen < 72 {
var tail uint = copylen - 8
var nbits uint = uint(log2FloorNonZero(tail) - 1)
var prefix uint = tail >> nbits
var code uint = (nbits << 1) + prefix + 28
var extra uint = tail - (prefix << nbits)
(*commands)[0] = uint32(code | extra<<8)
*commands = (*commands)[1:]
} else if copylen < 136 {
var tail uint = copylen - 8
var code uint = (tail >> 5) + 54
var extra uint = tail & 31
(*commands)[0] = uint32(code | extra<<8)
*commands = (*commands)[1:]
(*commands)[0] = 64
*commands = (*commands)[1:]
} else if copylen < 2120 {
var tail uint = copylen - 72
var nbits uint = uint(log2FloorNonZero(tail))
var code uint = nbits + 52
var extra uint = tail - (uint(1) << nbits)
(*commands)[0] = uint32(code | extra<<8)
*commands = (*commands)[1:]
(*commands)[0] = 64
*commands = (*commands)[1:]
} else {
var extra uint = copylen - 2120
(*commands)[0] = uint32(63 | extra<<8)
*commands = (*commands)[1:]
(*commands)[0] = 64
*commands = (*commands)[1:]
}
}
func emitDistance(distance uint32, commands *[]uint32) {
var d uint32 = distance + 3
var nbits uint32 = log2FloorNonZero(uint(d)) - 1
var prefix uint32 = (d >> nbits) & 1
var offset uint32 = (2 + prefix) << nbits
var distcode uint32 = 2*(nbits-1) + prefix + 80
var extra uint32 = d - offset
(*commands)[0] = distcode | extra<<8
*commands = (*commands)[1:]
}
/* REQUIRES: len <= 1 << 24. */
func storeMetaBlockHeader(len uint, is_uncompressed bool, storage_ix *uint, storage []byte) {
var nibbles uint = 6
/* ISLAST */
writeBits(1, 0, storage_ix, storage)
if len <= 1<<16 {
nibbles = 4
} else if len <= 1<<20 {
nibbles = 5
}
writeBits(2, uint64(nibbles)-4, storage_ix, storage)
writeBits(nibbles*4, uint64(len)-1, storage_ix, storage)
/* ISUNCOMPRESSED */
writeSingleBit(is_uncompressed, storage_ix, storage)
}
func storeMetaBlockHeaderBW(len uint, is_uncompressed bool, bw *bitWriter) {
var nibbles uint = 6
/* ISLAST */
bw.writeBits(1, 0)
if len <= 1<<16 {
nibbles = 4
} else if len <= 1<<20 {
nibbles = 5
}
bw.writeBits(2, uint64(nibbles)-4)
bw.writeBits(nibbles*4, uint64(len)-1)
/* ISUNCOMPRESSED */
bw.writeSingleBit(is_uncompressed)
}
func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) {
var ip int = 0
var shift uint = 64 - table_bits
var ip_end int = int(block_size)
var base_ip int = -cap(base_ip_ptr) + cap(input)
var next_emit int = 0
var last_distance int = -1
/* "ip" is the input pointer. */
const kInputMarginBytes uint = windowGap
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
if block_size >= kInputMarginBytes {
var len_limit uint = brotli_min_size_t(block_size-min_match, input_size-kInputMarginBytes)
var ip_limit int = int(len_limit)
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
var next_hash uint32
ip++
for next_hash = hash1(input[ip:], shift, min_match); ; {
var skip uint32 = 32
var next_ip int = ip
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (ie. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
var candidate int
assert(next_emit < ip)
trawl:
for {
var hash uint32 = next_hash
var bytes_between_hash_lookups uint32 = skip >> 5
skip++
ip = next_ip
assert(hash == hash1(input[ip:], shift, min_match))
next_ip = int(uint32(ip) + bytes_between_hash_lookups)
if next_ip > ip_limit {
goto emit_remainder
}
next_hash = hash1(input[next_ip:], shift, min_match)
candidate = ip - last_distance
if isMatch1(input[ip:], base_ip_ptr[candidate-base_ip:], min_match) {
if candidate < ip {
table[hash] = int(ip - base_ip)
break
}
}
candidate = base_ip + table[hash]
assert(candidate >= base_ip)
assert(candidate < ip)
table[hash] = int(ip - base_ip)
if isMatch1(input[ip:], base_ip_ptr[candidate-base_ip:], min_match) {
break
}
}
/* Check copy distance. If candidate is not feasible, continue search.
Checking is done outside of hot loop to reduce overhead. */
if ip-candidate > maxDistance_compress_fragment {
goto trawl
}
/* Step 2: Emit the found match together with the literal bytes from
"next_emit", and then see if we can find a next match immediately
afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
{
var base int = ip
/* > 0 */
var matched uint = min_match + findMatchLengthWithLimit(base_ip_ptr[uint(candidate-base_ip)+min_match:], input[uint(ip)+min_match:], uint(ip_end-ip)-min_match)
var distance int = int(base - candidate)
/* We have a 6-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
var insert int = int(base - next_emit)
ip += int(matched)
emitInsertLen(uint32(insert), commands)
copy(*literals, input[next_emit:][:uint(insert)])
*literals = (*literals)[insert:]
if distance == last_distance {
(*commands)[0] = 64
*commands = (*commands)[1:]
} else {
emitDistance(uint32(distance), commands)
last_distance = distance
}
emitCopyLenLastDistance(matched, commands)
next_emit = ip
if ip >= ip_limit {
goto emit_remainder
}
{
var input_bytes uint64
var cur_hash uint32
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
var prev_hash uint32
if min_match == 4 {
input_bytes = binary.LittleEndian.Uint64(input[ip-3:])
cur_hash = hashBytesAtOffset(input_bytes, 3, shift, min_match)
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 3)
prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
table[prev_hash] = int(ip - base_ip - 2)
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 1)
} else {
input_bytes = binary.LittleEndian.Uint64(input[ip-5:])
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 5)
prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
table[prev_hash] = int(ip - base_ip - 4)
prev_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
table[prev_hash] = int(ip - base_ip - 3)
input_bytes = binary.LittleEndian.Uint64(input[ip-2:])
cur_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 2)
prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
table[prev_hash] = int(ip - base_ip - 1)
}
candidate = base_ip + table[cur_hash]
table[cur_hash] = int(ip - base_ip)
}
}
for ip-candidate <= maxDistance_compress_fragment && isMatch1(input[ip:], base_ip_ptr[candidate-base_ip:], min_match) {
var base int = ip
/* We have a 6-byte match at ip, and no need to emit any
literal bytes prior to ip. */
var matched uint = min_match + findMatchLengthWithLimit(base_ip_ptr[uint(candidate-base_ip)+min_match:], input[uint(ip)+min_match:], uint(ip_end-ip)-min_match)
ip += int(matched)
last_distance = int(base - candidate) /* > 0 */
emitCopyLen(matched, commands)
emitDistance(uint32(last_distance), commands)
next_emit = ip
if ip >= ip_limit {
goto emit_remainder
}
{
var input_bytes uint64
var cur_hash uint32
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
var prev_hash uint32
if min_match == 4 {
input_bytes = binary.LittleEndian.Uint64(input[ip-3:])
cur_hash = hashBytesAtOffset(input_bytes, 3, shift, min_match)
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 3)
prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
table[prev_hash] = int(ip - base_ip - 2)
prev_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
table[prev_hash] = int(ip - base_ip - 1)
} else {
input_bytes = binary.LittleEndian.Uint64(input[ip-5:])
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 5)
prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
table[prev_hash] = int(ip - base_ip - 4)
prev_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
table[prev_hash] = int(ip - base_ip - 3)
input_bytes = binary.LittleEndian.Uint64(input[ip-2:])
cur_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
table[prev_hash] = int(ip - base_ip - 2)
prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
table[prev_hash] = int(ip - base_ip - 1)
}
candidate = base_ip + table[cur_hash]
table[cur_hash] = int(ip - base_ip)
}
}
ip++
next_hash = hash1(input[ip:], shift, min_match)
}
}
emit_remainder:
assert(next_emit <= ip_end)
/* Emit the remaining bytes as literals. */
if next_emit < ip_end {
var insert uint32 = uint32(ip_end - next_emit)
emitInsertLen(insert, commands)
copy(*literals, input[next_emit:][:insert])
*literals = (*literals)[insert:]
}
}
var storeCommands_kNumExtraBits = [128]uint32{
0,
0,
0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
7,
8,
9,
10,
12,
14,
24,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
7,
8,
9,
10,
24,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
6,
7,
7,
8,
8,
9,
9,
10,
10,
11,
11,
12,
12,
13,
13,
14,
14,
15,
15,
16,
16,
17,
17,
18,
18,
19,
19,
20,
20,
21,
21,
22,
22,
23,
23,
24,
24,
}
var storeCommands_kInsertOffset = [24]uint32{
0,
1,
2,
3,
4,
5,
6,
8,
10,
14,
18,
26,
34,
50,
66,
98,
130,
194,
322,
578,
1090,
2114,
6210,
22594,
}
func storeCommands(literals []byte, num_literals uint, commands []uint32, num_commands uint, storage_ix *uint, storage []byte) {
var lit_depths [256]byte
var lit_bits [256]uint16
var lit_histo = [256]uint32{0}
var cmd_depths = [128]byte{0}
var cmd_bits = [128]uint16{0}
var cmd_histo = [128]uint32{0}
var i uint
for i = 0; i < num_literals; i++ {
lit_histo[literals[i]]++
}
buildAndStoreHuffmanTreeFast(lit_histo[:], num_literals, /* max_bits = */
8, lit_depths[:], lit_bits[:], storage_ix, storage)
for i = 0; i < num_commands; i++ {
var code uint32 = commands[i] & 0xFF
assert(code < 128)
cmd_histo[code]++
}
cmd_histo[1] += 1
cmd_histo[2] += 1
cmd_histo[64] += 1
cmd_histo[84] += 1
buildAndStoreCommandPrefixCode(cmd_histo[:], cmd_depths[:], cmd_bits[:], storage_ix, storage)
for i = 0; i < num_commands; i++ {
var cmd uint32 = commands[i]
var code uint32 = cmd & 0xFF
var extra uint32 = cmd >> 8
assert(code < 128)
writeBits(uint(cmd_depths[code]), uint64(cmd_bits[code]), storage_ix, storage)
writeBits(uint(storeCommands_kNumExtraBits[code]), uint64(extra), storage_ix, storage)
if code < 24 {
var insert uint32 = storeCommands_kInsertOffset[code] + extra
var j uint32
for j = 0; j < insert; j++ {
var lit byte = literals[0]
writeBits(uint(lit_depths[lit]), uint64(lit_bits[lit]), storage_ix, storage)
literals = literals[1:]
}
}
}
}
/* Acceptable loss for uncompressible speedup is 2% */
const minRatio = 0.98
const sampleRate = 43
func shouldCompress(input []byte, input_size uint, num_literals uint) bool {
var corpus_size float64 = float64(input_size)
if float64(num_literals) < minRatio*corpus_size {
return true
} else {
var literal_histo = [256]uint32{0}
var max_total_bit_cost float64 = corpus_size * 8 * minRatio / sampleRate
var i uint
for i = 0; i < input_size; i += sampleRate {
literal_histo[input[i]]++
}
return bitsEntropy(literal_histo[:], 256) < max_total_bit_cost
}
}
func rewindBitPosition(new_storage_ix uint, storage_ix *uint, storage []byte) {
var bitpos uint = new_storage_ix & 7
var mask uint = (1 << bitpos) - 1
storage[new_storage_ix>>3] &= byte(mask)
*storage_ix = new_storage_ix
}
func emitUncompressedMetaBlock(input []byte, input_size uint, storage_ix *uint, storage []byte) {
storeMetaBlockHeader(input_size, true, storage_ix, storage)
*storage_ix = (*storage_ix + 7) &^ 7
copy(storage[*storage_ix>>3:], input[:input_size])
*storage_ix += input_size << 3
storage[*storage_ix>>3] = 0
}
func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_bits uint, min_match uint, storage_ix *uint, storage []byte) {
/* Save the start of the first block for position and distance computations.
*/
var base_ip []byte = input
for input_size > 0 {
var block_size uint = brotli_min_size_t(input_size, kCompressFragmentTwoPassBlockSize)
var commands []uint32 = command_buf
var literals []byte = literal_buf
var num_literals uint
createCommands(input, block_size, input_size, base_ip, table, table_bits, min_match, &literals, &commands)
num_literals = uint(-cap(literals) + cap(literal_buf))
if shouldCompress(input, block_size, num_literals) {
var num_commands uint = uint(-cap(commands) + cap(command_buf))
storeMetaBlockHeader(block_size, false, storage_ix, storage)
/* No block splits, no contexts. */
writeBits(13, 0, storage_ix, storage)
storeCommands(literal_buf, num_literals, command_buf, num_commands, storage_ix, storage)
} else {
/* Since we did not find many backward references and the entropy of
the data is close to 8 bits, we can simply emit an uncompressed block.
This makes compression speed of uncompressible data about 3x faster. */
emitUncompressedMetaBlock(input, block_size, storage_ix, storage)
}
input = input[block_size:]
input_size -= block_size
}
}
/*
Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block.
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
REQUIRES: "command_buf" and "literal_buf" point to at least
kCompressFragmentTwoPassBlockSize long arrays.
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is a power of two
OUTPUT: maximal copy distance <= |input_size|
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18)
*/
func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) {
var initial_storage_ix uint = *storage_ix
var table_bits uint = uint(log2FloorNonZero(table_size))
var min_match uint
if table_bits <= 15 {
min_match = 4
} else {
min_match = 6
}
compressFragmentTwoPassImpl(input, input_size, is_last, command_buf, literal_buf, table, table_bits, min_match, storage_ix, storage)
/* If output is larger than single uncompressed block, rewrite it. */
if *storage_ix-initial_storage_ix > 31+(input_size<<3) {
rewindBitPosition(initial_storage_ix, storage_ix, storage)
emitUncompressedMetaBlock(input, input_size, storage_ix, storage)
}
if is_last {
writeBits(1, 1, storage_ix, storage) /* islast */
writeBits(1, 1, storage_ix, storage) /* isempty */
*storage_ix = (*storage_ix + 7) &^ 7
}
}
+77
View File
@@ -0,0 +1,77 @@
package brotli
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Specification: 7.3. Encoding of the context map */
const contextMapMaxRle = 16
/* Specification: 2. Compressed representation overview */
const maxNumberOfBlockTypes = 256
/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
const numLiteralSymbols = 256
const numCommandSymbols = 704
const numBlockLenSymbols = 26
const maxContextMapSymbols = (maxNumberOfBlockTypes + contextMapMaxRle)
const maxBlockTypeSymbols = (maxNumberOfBlockTypes + 2)
/* Specification: 3.5. Complex prefix codes */
const repeatPreviousCodeLength = 16
const repeatZeroCodeLength = 17
const codeLengthCodes = (repeatZeroCodeLength + 1)
/* "code length of 8 is repeated" */
const initialRepeatedCodeLength = 8
/* "Large Window Brotli" */
const largeMaxDistanceBits = 62
const largeMinWbits = 10
const largeMaxWbits = 30
/* Specification: 4. Encoding of distances */
const numDistanceShortCodes = 16
const maxNpostfix = 3
const maxNdirect = 120
const maxDistanceBits = 24
func distanceAlphabetSize(NPOSTFIX uint, NDIRECT uint, MAXNBITS uint) uint {
return numDistanceShortCodes + NDIRECT + uint(MAXNBITS<<(NPOSTFIX+1))
}
/* numDistanceSymbols == 1128 */
const numDistanceSymbols = 1128
const maxDistance = 0x3FFFFFC
const maxAllowedDistance = 0x7FFFFFFC
/* 7.1. Context modes and context ID lookup for literals */
/* "context IDs for literals are in the range of 0..63" */
const literalContextBits = 6
/* 7.2. Context ID for distances */
const distanceContextBits = 2
/* 9.1. Format of the Stream Header */
/* Number of slack bytes for window size. Don't confuse
with BROTLI_NUM_DISTANCE_SHORT_CODES. */
const windowGap = 16
func maxBackwardLimit(W uint) uint {
return (uint(1) << W) - windowGap
}
+2176
View File
File diff suppressed because it is too large Load Diff
+2581
View File
File diff suppressed because it is too large Load Diff
+122890
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+1220
View File
File diff suppressed because it is too large Load Diff
+168
View File
@@ -0,0 +1,168 @@
package brotli
import "github.com/andybalholm/brotli/matchfinder"
// An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
type Encoder struct {
wroteHeader bool
bw bitWriter
distCache []distanceCode
}
func (e *Encoder) Reset() {
e.wroteHeader = false
e.bw = bitWriter{}
}
func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
e.bw.dst = dst
if !e.wroteHeader {
e.bw.writeBits(4, 15)
e.wroteHeader = true
}
var literalHisto [256]uint32
var commandHisto [704]uint32
var distanceHisto [64]uint32
literalCount := 0
commandCount := 0
distanceCount := 0
if len(e.distCache) < len(matches) {
e.distCache = make([]distanceCode, len(matches))
}
// first pass: build the histograms
pos := 0
// d is the ring buffer of the last 4 distances.
d := [4]int{-10, -10, -10, -10}
for i, m := range matches {
if m.Unmatched > 0 {
for _, c := range src[pos : pos+m.Unmatched] {
literalHisto[c]++
}
literalCount += m.Unmatched
}
insertCode := getInsertLengthCode(uint(m.Unmatched))
copyCode := getCopyLengthCode(uint(m.Length))
if m.Length == 0 {
// If the stream ends with unmatched bytes, we need a dummy copy length.
copyCode = 2
}
command := combineLengthCodes(insertCode, copyCode, false)
commandHisto[command]++
commandCount++
if command >= 128 && m.Length != 0 {
var distCode distanceCode
switch m.Distance {
case d[3]:
distCode.code = 0
case d[2]:
distCode.code = 1
case d[1]:
distCode.code = 2
case d[0]:
distCode.code = 3
case d[3] - 1:
distCode.code = 4
case d[3] + 1:
distCode.code = 5
case d[3] - 2:
distCode.code = 6
case d[3] + 2:
distCode.code = 7
case d[3] - 3:
distCode.code = 8
case d[3] + 3:
distCode.code = 9
// In my testing, codes 1015 actually reduced the compression ratio.
default:
distCode = getDistanceCode(m.Distance)
}
e.distCache[i] = distCode
distanceHisto[distCode.code]++
distanceCount++
if distCode.code != 0 {
d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
}
}
pos += m.Unmatched + m.Length
}
storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
e.bw.writeBits(13, 0)
var literalDepths [256]byte
var literalBits [256]uint16
buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
var commandDepths [704]byte
var commandBits [704]uint16
buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
var distanceDepths [64]byte
var distanceBits [64]uint16
buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
pos = 0
for i, m := range matches {
insertCode := getInsertLengthCode(uint(m.Unmatched))
copyCode := getCopyLengthCode(uint(m.Length))
if m.Length == 0 {
// If the stream ends with unmatched bytes, we need a dummy copy length.
copyCode = 2
}
command := combineLengthCodes(insertCode, copyCode, false)
e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
if kInsExtra[insertCode] > 0 {
e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
}
if kCopyExtra[copyCode] > 0 {
e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
}
if m.Unmatched > 0 {
for _, c := range src[pos : pos+m.Unmatched] {
e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
}
}
if command >= 128 && m.Length != 0 {
distCode := e.distCache[i]
e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
if distCode.nExtra > 0 {
e.bw.writeBits(distCode.nExtra, distCode.extraBits)
}
}
pos += m.Unmatched + m.Length
}
if lastBlock {
e.bw.writeBits(2, 3) // islast + isempty
e.bw.jumpToByteBoundary()
}
return e.bw.dst
}
type distanceCode struct {
code int
nExtra uint
extraBits uint64
}
func getDistanceCode(distance int) distanceCode {
d := distance + 3
nbits := log2FloorNonZero(uint(d)) - 1
prefix := (d >> nbits) & 1
offset := (2 + prefix) << nbits
distcode := int(2*(nbits-1)) + prefix + 16
extra := d - offset
return distanceCode{distcode, uint(nbits), uint64(extra)}
}
+22
View File
@@ -0,0 +1,22 @@
package brotli
/* Dictionary data (words and transforms) for 1 possible context */
type encoderDictionary struct {
words *dictionary
cutoffTransformsCount uint32
cutoffTransforms uint64
hash_table []uint16
buckets []uint16
dict_words []dictWord
}
func initEncoderDictionary(dict *encoderDictionary) {
dict.words = getDictionary()
dict.hash_table = kStaticDictionaryHash[:]
dict.buckets = kStaticDictionaryBuckets[:]
dict.dict_words = kStaticDictionaryWords[:]
dict.cutoffTransformsCount = kCutoffTransformsCount
dict.cutoffTransforms = kCutoffTransforms
}
+592
View File
@@ -0,0 +1,592 @@
package brotli
import "math"
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Entropy encoding (Huffman) utilities. */
/* A node of a Huffman tree. */
type huffmanTree struct {
total_count_ uint32
index_left_ int16
index_right_or_value_ int16
}
func initHuffmanTree(self *huffmanTree, count uint32, left int16, right int16) {
self.total_count_ = count
self.index_left_ = left
self.index_right_or_value_ = right
}
/* Input size optimized Shell sort. */
type huffmanTreeComparator func(huffmanTree, huffmanTree) bool
var sortHuffmanTreeItems_gaps = []uint{132, 57, 23, 10, 4, 1}
func sortHuffmanTreeItems(items []huffmanTree, n uint, comparator huffmanTreeComparator) {
if n < 13 {
/* Insertion sort. */
var i uint
for i = 1; i < n; i++ {
var tmp huffmanTree = items[i]
var k uint = i
var j uint = i - 1
for comparator(tmp, items[j]) {
items[k] = items[j]
k = j
if j == 0 {
break
}
j--
}
items[k] = tmp
}
return
} else {
var g int
if n < 57 {
g = 2
} else {
g = 0
}
for ; g < 6; g++ {
var gap uint = sortHuffmanTreeItems_gaps[g]
var i uint
for i = gap; i < n; i++ {
var j uint = i
var tmp huffmanTree = items[i]
for ; j >= gap && comparator(tmp, items[j-gap]); j -= gap {
items[j] = items[j-gap]
}
items[j] = tmp
}
}
}
}
/* Returns 1 if assignment of depths succeeded, otherwise 0. */
func setDepth(p0 int, pool []huffmanTree, depth []byte, max_depth int) bool {
var stack [16]int
var level int = 0
var p int = p0
assert(max_depth <= 15)
stack[0] = -1
for {
if pool[p].index_left_ >= 0 {
level++
if level > max_depth {
return false
}
stack[level] = int(pool[p].index_right_or_value_)
p = int(pool[p].index_left_)
continue
} else {
depth[pool[p].index_right_or_value_] = byte(level)
}
for level >= 0 && stack[level] == -1 {
level--
}
if level < 0 {
return true
}
p = stack[level]
stack[level] = -1
}
}
/* Sort the root nodes, least popular first. */
func sortHuffmanTree(v0 huffmanTree, v1 huffmanTree) bool {
if v0.total_count_ != v1.total_count_ {
return v0.total_count_ < v1.total_count_
}
return v0.index_right_or_value_ > v1.index_right_or_value_
}
/* This function will create a Huffman tree.
The catch here is that the tree cannot be arbitrarily deep.
Brotli specifies a maximum depth of 15 bits for "code trees"
and 7 bits for "code length code trees."
count_limit is the value that is to be faked as the minimum value
and this minimum value is raised until the tree matches the
maximum length requirement.
This algorithm is not of excellent performance for very long data blocks,
especially when population counts are longer than 2**tree_limit, but
we are not planning to use this with extremely long blocks.
See http://en.wikipedia.org/wiki/Huffman_coding */
func createHuffmanTree(data []uint32, length uint, tree_limit int, tree []huffmanTree, depth []byte) {
var count_limit uint32
var sentinel huffmanTree
initHuffmanTree(&sentinel, math.MaxUint32, -1, -1)
/* For block sizes below 64 kB, we never need to do a second iteration
of this loop. Probably all of our block sizes will be smaller than
that, so this loop is mostly of academic interest. If we actually
would need this, we would be better off with the Katajainen algorithm. */
for count_limit = 1; ; count_limit *= 2 {
var n uint = 0
var i uint
var j uint
var k uint
for i = length; i != 0; {
i--
if data[i] != 0 {
var count uint32 = brotli_max_uint32_t(data[i], count_limit)
initHuffmanTree(&tree[n], count, -1, int16(i))
n++
}
}
if n == 1 {
depth[tree[0].index_right_or_value_] = 1 /* Only one element. */
break
}
sortHuffmanTreeItems(tree, n, huffmanTreeComparator(sortHuffmanTree))
/* The nodes are:
[0, n): the sorted leaf nodes that we start with.
[n]: we add a sentinel here.
[n + 1, 2n): new parent nodes are added here, starting from
(n+1). These are naturally in ascending order.
[2n]: we add a sentinel at the end as well.
There will be (2n+1) elements at the end. */
tree[n] = sentinel
tree[n+1] = sentinel
i = 0 /* Points to the next leaf node. */
j = n + 1 /* Points to the next non-leaf node. */
for k = n - 1; k != 0; k-- {
var left uint
var right uint
if tree[i].total_count_ <= tree[j].total_count_ {
left = i
i++
} else {
left = j
j++
}
if tree[i].total_count_ <= tree[j].total_count_ {
right = i
i++
} else {
right = j
j++
}
{
/* The sentinel node becomes the parent node. */
var j_end uint = 2*n - k
tree[j_end].total_count_ = tree[left].total_count_ + tree[right].total_count_
tree[j_end].index_left_ = int16(left)
tree[j_end].index_right_or_value_ = int16(right)
/* Add back the last sentinel node. */
tree[j_end+1] = sentinel
}
}
if setDepth(int(2*n-1), tree[0:], depth, tree_limit) {
/* We need to pack the Huffman tree in tree_limit bits. If this was not
successful, add fake entities to the lowest values and retry. */
break
}
}
}
func reverse(v []byte, start uint, end uint) {
end--
for start < end {
var tmp byte = v[start]
v[start] = v[end]
v[end] = tmp
start++
end--
}
}
func writeHuffmanTreeRepetitions(previous_value byte, value byte, repetitions uint, tree_size *uint, tree []byte, extra_bits_data []byte) {
assert(repetitions > 0)
if previous_value != value {
tree[*tree_size] = value
extra_bits_data[*tree_size] = 0
(*tree_size)++
repetitions--
}
if repetitions == 7 {
tree[*tree_size] = value
extra_bits_data[*tree_size] = 0
(*tree_size)++
repetitions--
}
if repetitions < 3 {
var i uint
for i = 0; i < repetitions; i++ {
tree[*tree_size] = value
extra_bits_data[*tree_size] = 0
(*tree_size)++
}
} else {
var start uint = *tree_size
repetitions -= 3
for {
tree[*tree_size] = repeatPreviousCodeLength
extra_bits_data[*tree_size] = byte(repetitions & 0x3)
(*tree_size)++
repetitions >>= 2
if repetitions == 0 {
break
}
repetitions--
}
reverse(tree, start, *tree_size)
reverse(extra_bits_data, start, *tree_size)
}
}
func writeHuffmanTreeRepetitionsZeros(repetitions uint, tree_size *uint, tree []byte, extra_bits_data []byte) {
if repetitions == 11 {
tree[*tree_size] = 0
extra_bits_data[*tree_size] = 0
(*tree_size)++
repetitions--
}
if repetitions < 3 {
var i uint
for i = 0; i < repetitions; i++ {
tree[*tree_size] = 0
extra_bits_data[*tree_size] = 0
(*tree_size)++
}
} else {
var start uint = *tree_size
repetitions -= 3
for {
tree[*tree_size] = repeatZeroCodeLength
extra_bits_data[*tree_size] = byte(repetitions & 0x7)
(*tree_size)++
repetitions >>= 3
if repetitions == 0 {
break
}
repetitions--
}
reverse(tree, start, *tree_size)
reverse(extra_bits_data, start, *tree_size)
}
}
/* Change the population counts in a way that the consequent
Huffman tree compression, especially its RLE-part will be more
likely to compress this data more efficiently.
length contains the size of the histogram.
counts contains the population counts.
good_for_rle is a buffer of at least length size */
func optimizeHuffmanCountsForRLE(length uint, counts []uint32, good_for_rle []byte) {
var nonzero_count uint = 0
var stride uint
var limit uint
var sum uint
var streak_limit uint = 1240
var i uint
/* Let's make the Huffman code more compatible with RLE encoding. */
for i = 0; i < length; i++ {
if counts[i] != 0 {
nonzero_count++
}
}
if nonzero_count < 16 {
return
}
for length != 0 && counts[length-1] == 0 {
length--
}
if length == 0 {
return /* All zeros. */
}
/* Now counts[0..length - 1] does not have trailing zeros. */
{
var nonzeros uint = 0
var smallest_nonzero uint32 = 1 << 30
for i = 0; i < length; i++ {
if counts[i] != 0 {
nonzeros++
if smallest_nonzero > counts[i] {
smallest_nonzero = counts[i]
}
}
}
if nonzeros < 5 {
/* Small histogram will model it well. */
return
}
if smallest_nonzero < 4 {
var zeros uint = length - nonzeros
if zeros < 6 {
for i = 1; i < length-1; i++ {
if counts[i-1] != 0 && counts[i] == 0 && counts[i+1] != 0 {
counts[i] = 1
}
}
}
}
if nonzeros < 28 {
return
}
}
/* 2) Let's mark all population counts that already can be encoded
with an RLE code. */
for i := 0; i < int(length); i++ {
good_for_rle[i] = 0
}
{
var symbol uint32 = counts[0]
/* Let's not spoil any of the existing good RLE codes.
Mark any seq of 0's that is longer as 5 as a good_for_rle.
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
var step uint = 0
for i = 0; i <= length; i++ {
if i == length || counts[i] != symbol {
if (symbol == 0 && step >= 5) || (symbol != 0 && step >= 7) {
var k uint
for k = 0; k < step; k++ {
good_for_rle[i-k-1] = 1
}
}
step = 1
if i != length {
symbol = counts[i]
}
} else {
step++
}
}
}
/* 3) Let's replace those population counts that lead to more RLE codes.
Math here is in 24.8 fixed point representation. */
stride = 0
limit = uint(256*(counts[0]+counts[1]+counts[2])/3 + 420)
sum = 0
for i = 0; i <= length; i++ {
if i == length || good_for_rle[i] != 0 || (i != 0 && good_for_rle[i-1] != 0) || (256*counts[i]-uint32(limit)+uint32(streak_limit)) >= uint32(2*streak_limit) {
if stride >= 4 || (stride >= 3 && sum == 0) {
var k uint
var count uint = (sum + stride/2) / stride
/* The stride must end, collapse what we have, if we have enough (4). */
if count == 0 {
count = 1
}
if sum == 0 {
/* Don't make an all zeros stride to be upgraded to ones. */
count = 0
}
for k = 0; k < stride; k++ {
/* We don't want to change value at counts[i],
that is already belonging to the next stride. Thus - 1. */
counts[i-k-1] = uint32(count)
}
}
stride = 0
sum = 0
if i < length-2 {
/* All interesting strides have a count of at least 4, */
/* at least when non-zeros. */
limit = uint(256*(counts[i]+counts[i+1]+counts[i+2])/3 + 420)
} else if i < length {
limit = uint(256 * counts[i])
} else {
limit = 0
}
}
stride++
if i != length {
sum += uint(counts[i])
if stride >= 4 {
limit = (256*sum + stride/2) / stride
}
if stride == 4 {
limit += 120
}
}
}
}
func decideOverRLEUse(depth []byte, length uint, use_rle_for_non_zero *bool, use_rle_for_zero *bool) {
var total_reps_zero uint = 0
var total_reps_non_zero uint = 0
var count_reps_zero uint = 1
var count_reps_non_zero uint = 1
var i uint
for i = 0; i < length; {
var value byte = depth[i]
var reps uint = 1
var k uint
for k = i + 1; k < length && depth[k] == value; k++ {
reps++
}
if reps >= 3 && value == 0 {
total_reps_zero += reps
count_reps_zero++
}
if reps >= 4 && value != 0 {
total_reps_non_zero += reps
count_reps_non_zero++
}
i += reps
}
*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero*2
*use_rle_for_zero = total_reps_zero > count_reps_zero*2
}
/* Write a Huffman tree from bit depths into the bit-stream representation
of a Huffman tree. The generated Huffman tree is to be compressed once
more using a Huffman tree */
func writeHuffmanTree(depth []byte, length uint, tree_size *uint, tree []byte, extra_bits_data []byte) {
var previous_value byte = initialRepeatedCodeLength
var i uint
var use_rle_for_non_zero bool = false
var use_rle_for_zero bool = false
var new_length uint = length
/* Throw away trailing zeros. */
for i = 0; i < length; i++ {
if depth[length-i-1] == 0 {
new_length--
} else {
break
}
}
/* First gather statistics on if it is a good idea to do RLE. */
if length > 50 {
/* Find RLE coding for longer codes.
Shorter codes seem not to benefit from RLE. */
decideOverRLEUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero)
}
/* Actual RLE coding. */
for i = 0; i < new_length; {
var value byte = depth[i]
var reps uint = 1
if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
var k uint
for k = i + 1; k < new_length && depth[k] == value; k++ {
reps++
}
}
if value == 0 {
writeHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data)
} else {
writeHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, extra_bits_data)
previous_value = value
}
i += reps
}
}
var reverseBits_kLut = [16]uint{
0x00,
0x08,
0x04,
0x0C,
0x02,
0x0A,
0x06,
0x0E,
0x01,
0x09,
0x05,
0x0D,
0x03,
0x0B,
0x07,
0x0F,
}
func reverseBits(num_bits uint, bits uint16) uint16 {
var retval uint = reverseBits_kLut[bits&0x0F]
var i uint
for i = 4; i < num_bits; i += 4 {
retval <<= 4
bits = uint16(bits >> 4)
retval |= reverseBits_kLut[bits&0x0F]
}
retval >>= ((0 - num_bits) & 0x03)
return uint16(retval)
}
/* 0..15 are values for bits */
const maxHuffmanBits = 16
/* Get the actual bit values for a tree of bit depths. */
func convertBitDepthsToSymbols(depth []byte, len uint, bits []uint16) {
var bl_count = [maxHuffmanBits]uint16{0}
var next_code [maxHuffmanBits]uint16
var i uint
/* In Brotli, all bit depths are [1..15]
0 bit depth means that the symbol does not exist. */
var code int = 0
for i = 0; i < len; i++ {
bl_count[depth[i]]++
}
bl_count[0] = 0
next_code[0] = 0
for i = 1; i < maxHuffmanBits; i++ {
code = (code + int(bl_count[i-1])) << 1
next_code[i] = uint16(code)
}
for i = 0; i < len; i++ {
if depth[i] != 0 {
bits[i] = reverseBits(uint(depth[i]), next_code[depth[i]])
next_code[depth[i]]++
}
}
}
File diff suppressed because it is too large Load Diff
+290
View File
@@ -0,0 +1,290 @@
package brotli
import (
"math"
"math/bits"
)
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Utilities for fast computation of logarithms. */
func log2FloorNonZero(n uint) uint32 {
return uint32(bits.Len(n)) - 1
}
/* A lookup table for small values of log2(int) to be used in entropy
computation.
", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
var kLog2Table = []float32{
0.0000000000000000,
0.0000000000000000,
1.0000000000000000,
1.5849625007211563,
2.0000000000000000,
2.3219280948873622,
2.5849625007211561,
2.8073549220576042,
3.0000000000000000,
3.1699250014423126,
3.3219280948873626,
3.4594316186372978,
3.5849625007211565,
3.7004397181410922,
3.8073549220576037,
3.9068905956085187,
4.0000000000000000,
4.0874628412503400,
4.1699250014423122,
4.2479275134435852,
4.3219280948873626,
4.3923174227787607,
4.4594316186372973,
4.5235619560570131,
4.5849625007211570,
4.6438561897747244,
4.7004397181410926,
4.7548875021634691,
4.8073549220576037,
4.8579809951275728,
4.9068905956085187,
4.9541963103868758,
5.0000000000000000,
5.0443941193584534,
5.0874628412503400,
5.1292830169449664,
5.1699250014423122,
5.2094533656289501,
5.2479275134435852,
5.2854022188622487,
5.3219280948873626,
5.3575520046180838,
5.3923174227787607,
5.4262647547020979,
5.4594316186372973,
5.4918530963296748,
5.5235619560570131,
5.5545888516776376,
5.5849625007211570,
5.6147098441152083,
5.6438561897747244,
5.6724253419714961,
5.7004397181410926,
5.7279204545631996,
5.7548875021634691,
5.7813597135246599,
5.8073549220576046,
5.8328900141647422,
5.8579809951275719,
5.8826430493618416,
5.9068905956085187,
5.9307373375628867,
5.9541963103868758,
5.9772799234999168,
6.0000000000000000,
6.0223678130284544,
6.0443941193584534,
6.0660891904577721,
6.0874628412503400,
6.1085244567781700,
6.1292830169449672,
6.1497471195046822,
6.1699250014423122,
6.1898245588800176,
6.2094533656289510,
6.2288186904958804,
6.2479275134435861,
6.2667865406949019,
6.2854022188622487,
6.3037807481771031,
6.3219280948873617,
6.3398500028846252,
6.3575520046180847,
6.3750394313469254,
6.3923174227787598,
6.4093909361377026,
6.4262647547020979,
6.4429434958487288,
6.4594316186372982,
6.4757334309663976,
6.4918530963296748,
6.5077946401986964,
6.5235619560570131,
6.5391588111080319,
6.5545888516776376,
6.5698556083309478,
6.5849625007211561,
6.5999128421871278,
6.6147098441152092,
6.6293566200796095,
6.6438561897747253,
6.6582114827517955,
6.6724253419714952,
6.6865005271832185,
6.7004397181410917,
6.7142455176661224,
6.7279204545631988,
6.7414669864011465,
6.7548875021634691,
6.7681843247769260,
6.7813597135246599,
6.7944158663501062,
6.8073549220576037,
6.8201789624151887,
6.8328900141647422,
6.8454900509443757,
6.8579809951275719,
6.8703647195834048,
6.8826430493618416,
6.8948177633079437,
6.9068905956085187,
6.9188632372745955,
6.9307373375628867,
6.9425145053392399,
6.9541963103868758,
6.9657842846620879,
6.9772799234999168,
6.9886846867721664,
7.0000000000000000,
7.0112272554232540,
7.0223678130284544,
7.0334230015374501,
7.0443941193584534,
7.0552824355011898,
7.0660891904577721,
7.0768155970508317,
7.0874628412503400,
7.0980320829605272,
7.1085244567781700,
7.1189410727235076,
7.1292830169449664,
7.1395513523987937,
7.1497471195046822,
7.1598713367783891,
7.1699250014423130,
7.1799090900149345,
7.1898245588800176,
7.1996723448363644,
7.2094533656289492,
7.2191685204621621,
7.2288186904958804,
7.2384047393250794,
7.2479275134435861,
7.2573878426926521,
7.2667865406949019,
7.2761244052742384,
7.2854022188622487,
7.2946207488916270,
7.3037807481771031,
7.3128829552843557,
7.3219280948873617,
7.3309168781146177,
7.3398500028846243,
7.3487281542310781,
7.3575520046180847,
7.3663222142458151,
7.3750394313469254,
7.3837042924740528,
7.3923174227787607,
7.4008794362821844,
7.4093909361377026,
7.4178525148858991,
7.4262647547020979,
7.4346282276367255,
7.4429434958487288,
7.4512111118323299,
7.4594316186372973,
7.4676055500829976,
7.4757334309663976,
7.4838157772642564,
7.4918530963296748,
7.4998458870832057,
7.5077946401986964,
7.5156998382840436,
7.5235619560570131,
7.5313814605163119,
7.5391588111080319,
7.5468944598876373,
7.5545888516776376,
7.5622424242210728,
7.5698556083309478,
7.5774288280357487,
7.5849625007211561,
7.5924570372680806,
7.5999128421871278,
7.6073303137496113,
7.6147098441152075,
7.6220518194563764,
7.6293566200796095,
7.6366246205436488,
7.6438561897747244,
7.6510516911789290,
7.6582114827517955,
7.6653359171851765,
7.6724253419714952,
7.6794800995054464,
7.6865005271832185,
7.6934869574993252,
7.7004397181410926,
7.7073591320808825,
7.7142455176661224,
7.7210991887071856,
7.7279204545631996,
7.7347096202258392,
7.7414669864011465,
7.7481928495894596,
7.7548875021634691,
7.7615512324444795,
7.7681843247769260,
7.7747870596011737,
7.7813597135246608,
7.7879025593914317,
7.7944158663501062,
7.8008998999203047,
7.8073549220576037,
7.8137811912170374,
7.8201789624151887,
7.8265484872909159,
7.8328900141647422,
7.8392037880969445,
7.8454900509443757,
7.8517490414160571,
7.8579809951275719,
7.8641861446542798,
7.8703647195834048,
7.8765169465650002,
7.8826430493618425,
7.8887432488982601,
7.8948177633079446,
7.9008668079807496,
7.9068905956085187,
7.9128893362299619,
7.9188632372745955,
7.9248125036057813,
7.9307373375628867,
7.9366379390025719,
7.9425145053392399,
7.9483672315846778,
7.9541963103868758,
7.9600019320680806,
7.9657842846620870,
7.9715435539507720,
7.9772799234999168,
7.9829935746943104,
7.9886846867721664,
7.9943534368588578,
}
/* Faster logarithm for small integers, with the property of log2(0) == 0. */
func fastLog2(v uint) float64 {
if v < uint(len(kLog2Table)) {
return float64(kLog2Table[v])
}
return math.Log2(float64(v))
}
+45
View File
@@ -0,0 +1,45 @@
package brotli
import (
"encoding/binary"
"math/bits"
"runtime"
)
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find maximal matching prefixes of strings. */
func findMatchLengthWithLimit(s1 []byte, s2 []byte, limit uint) uint {
var matched uint = 0
_, _ = s1[limit-1], s2[limit-1] // bounds check
switch runtime.GOARCH {
case "amd64":
// Compare 8 bytes at at time.
for matched+8 <= limit {
w1 := binary.LittleEndian.Uint64(s1[matched:])
w2 := binary.LittleEndian.Uint64(s2[matched:])
if w1 != w2 {
return matched + uint(bits.TrailingZeros64(w1^w2)>>3)
}
matched += 8
}
case "386":
// Compare 4 bytes at at time.
for matched+4 <= limit {
w1 := binary.LittleEndian.Uint32(s1[matched:])
w2 := binary.LittleEndian.Uint32(s2[matched:])
if w1 != w2 {
return matched + uint(bits.TrailingZeros32(w1^w2)>>3)
}
matched += 4
}
}
for matched < limit && s1[matched] == s2[matched] {
matched++
}
return matched
}
+287
View File
@@ -0,0 +1,287 @@
package brotli
import "encoding/binary"
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func (*h10) HashTypeLength() uint {
return 4
}
func (*h10) StoreLookahead() uint {
return 128
}
func hashBytesH10(data []byte) uint32 {
var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - 17)
}
/* A (forgetful) hash table where each hash bucket contains a binary tree of
sequences whose first 4 bytes share the same hash code.
Each sequence is 128 long and is identified by its starting
position in the input data. The binary tree is sorted by the lexicographic
order of the sequences, and it is also a max-heap with respect to the
starting positions. */
type h10 struct {
hasherCommon
window_mask_ uint
buckets_ [1 << 17]uint32
invalid_pos_ uint32
forest []uint32
}
func (h *h10) Initialize(params *encoderParams) {
h.window_mask_ = (1 << params.lgwin) - 1
h.invalid_pos_ = uint32(0 - h.window_mask_)
var num_nodes uint = uint(1) << params.lgwin
h.forest = make([]uint32, 2*num_nodes)
}
func (h *h10) Prepare(one_shot bool, input_size uint, data []byte) {
var invalid_pos uint32 = h.invalid_pos_
var i uint32
for i = 0; i < 1<<17; i++ {
h.buckets_[i] = invalid_pos
}
}
func leftChildIndexH10(self *h10, pos uint) uint {
return 2 * (pos & self.window_mask_)
}
func rightChildIndexH10(self *h10, pos uint) uint {
return 2*(pos&self.window_mask_) + 1
}
/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
hash bucket's binary tree is searched for matches and is re-rooted at the
current position.
If less than 128 data is available, the hash bucket of the
current position is searched for matches, but the state of the hash table
is not changed, since we can not know the final sorting order of the
current (incomplete) sequence.
This function must be called with increasing cur_ix positions. */
func storeAndFindMatchesH10(self *h10, data []byte, cur_ix uint, ring_buffer_mask uint, max_length uint, max_backward uint, best_len *uint, matches []backwardMatch) []backwardMatch {
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var max_comp_len uint = brotli_min_size_t(max_length, 128)
var should_reroot_tree bool = (max_length >= 128)
var key uint32 = hashBytesH10(data[cur_ix_masked:])
var forest []uint32 = self.forest
var prev_ix uint = uint(self.buckets_[key])
var node_left uint = leftChildIndexH10(self, cur_ix)
var node_right uint = rightChildIndexH10(self, cur_ix)
var best_len_left uint = 0
var best_len_right uint = 0
var depth_remaining uint
/* The forest index of the rightmost node of the left subtree of the new
root, updated as we traverse and re-root the tree of the hash bucket. */
/* The forest index of the leftmost node of the right subtree of the new
root, updated as we traverse and re-root the tree of the hash bucket. */
/* The match length of the rightmost node of the left subtree of the new
root, updated as we traverse and re-root the tree of the hash bucket. */
/* The match length of the leftmost node of the right subtree of the new
root, updated as we traverse and re-root the tree of the hash bucket. */
if should_reroot_tree {
self.buckets_[key] = uint32(cur_ix)
}
for depth_remaining = 64; ; depth_remaining-- {
var backward uint = cur_ix - prev_ix
var prev_ix_masked uint = prev_ix & ring_buffer_mask
if backward == 0 || backward > max_backward || depth_remaining == 0 {
if should_reroot_tree {
forest[node_left] = self.invalid_pos_
forest[node_right] = self.invalid_pos_
}
break
}
{
var cur_len uint = brotli_min_size_t(best_len_left, best_len_right)
var len uint
assert(cur_len <= 128)
len = cur_len + findMatchLengthWithLimit(data[cur_ix_masked+cur_len:], data[prev_ix_masked+cur_len:], max_length-cur_len)
if matches != nil && len > *best_len {
*best_len = uint(len)
initBackwardMatch(&matches[0], backward, uint(len))
matches = matches[1:]
}
if len >= max_comp_len {
if should_reroot_tree {
forest[node_left] = forest[leftChildIndexH10(self, prev_ix)]
forest[node_right] = forest[rightChildIndexH10(self, prev_ix)]
}
break
}
if data[cur_ix_masked+len] > data[prev_ix_masked+len] {
best_len_left = uint(len)
if should_reroot_tree {
forest[node_left] = uint32(prev_ix)
}
node_left = rightChildIndexH10(self, prev_ix)
prev_ix = uint(forest[node_left])
} else {
best_len_right = uint(len)
if should_reroot_tree {
forest[node_right] = uint32(prev_ix)
}
node_right = leftChildIndexH10(self, prev_ix)
prev_ix = uint(forest[node_right])
}
}
}
return matches
}
/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
length of max_length and stores the position cur_ix in the hash table.
Sets *num_matches to the number of matches found, and stores the found
matches in matches[0] to matches[*num_matches - 1]. The matches will be
sorted by strictly increasing length and (non-strictly) increasing
distance. */
func findAllMatchesH10(handle *h10, dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, cur_ix uint, max_length uint, max_backward uint, gap uint, params *encoderParams, matches []backwardMatch) uint {
var orig_matches []backwardMatch = matches
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var best_len uint = 1
var short_match_max_backward uint
if params.quality != hqZopflificationQuality {
short_match_max_backward = 16
} else {
short_match_max_backward = 64
}
var stop uint = cur_ix - short_match_max_backward
var dict_matches [maxStaticDictionaryMatchLen + 1]uint32
var i uint
if cur_ix < short_match_max_backward {
stop = 0
}
for i = cur_ix - 1; i > stop && best_len <= 2; i-- {
var prev_ix uint = i
var backward uint = cur_ix - prev_ix
if backward > max_backward {
break
}
prev_ix &= ring_buffer_mask
if data[cur_ix_masked] != data[prev_ix] || data[cur_ix_masked+1] != data[prev_ix+1] {
continue
}
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len > best_len {
best_len = uint(len)
initBackwardMatch(&matches[0], backward, uint(len))
matches = matches[1:]
}
}
}
if best_len < max_length {
matches = storeAndFindMatchesH10(handle, data, cur_ix, ring_buffer_mask, max_length, max_backward, &best_len, matches)
}
for i = 0; i <= maxStaticDictionaryMatchLen; i++ {
dict_matches[i] = kInvalidMatch
}
{
var minlen uint = brotli_max_size_t(4, best_len+1)
if findAllStaticDictionaryMatches(dictionary, data[cur_ix_masked:], minlen, max_length, dict_matches[0:]) {
var maxlen uint = brotli_min_size_t(maxStaticDictionaryMatchLen, max_length)
var l uint
for l = minlen; l <= maxlen; l++ {
var dict_id uint32 = dict_matches[l]
if dict_id < kInvalidMatch {
var distance uint = max_backward + gap + uint(dict_id>>5) + 1
if distance <= params.dist.max_distance {
initDictionaryBackwardMatch(&matches[0], distance, l, uint(dict_id&31))
matches = matches[1:]
}
}
}
}
}
return uint(-cap(matches) + cap(orig_matches))
}
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
current sequence, without returning any matches.
REQUIRES: ix + 128 <= end-of-current-block */
func (h *h10) Store(data []byte, mask uint, ix uint) {
var max_backward uint = h.window_mask_ - windowGap + 1
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
storeAndFindMatchesH10(h, data, ix, mask, 128, max_backward, nil, nil)
}
func (h *h10) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint = ix_start
var j uint = ix_start
if ix_start+63 <= ix_end {
i = ix_end - 63
}
if ix_start+512 <= i {
for ; j < i; j += 8 {
h.Store(data, mask, j)
}
}
for ; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *h10) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 128 {
var i_start uint = position - 128 + 1
var i_end uint = brotli_min_size_t(position, i_start+num_bytes)
/* Store the last `128 - 1` positions in the hasher.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
var i uint
for i = i_start; i < i_end; i++ {
/* Maximum distance is window size - 16, see section 9.1. of the spec.
Furthermore, we have to make sure that we don't look further back
from the start of the next block than the window size, otherwise we
could access already overwritten areas of the ring-buffer. */
var max_backward uint = h.window_mask_ - brotli_max_size_t(windowGap-1, position-i)
/* We know that i + 128 <= position + num_bytes, i.e. the
end of the current block and that we have at least
128 tail in the ring-buffer. */
storeAndFindMatchesH10(h, ringbuffer, i, ringbuffer_mask, 128, max_backward, nil, nil)
}
}
}
/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
const maxNumMatchesH10 = 128
func (*h10) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
panic("unimplemented")
}
func (*h10) PrepareDistanceCache(distance_cache []int) {
panic("unimplemented")
}
+214
View File
@@ -0,0 +1,214 @@
package brotli
import "encoding/binary"
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (bucket_size_) to a ring buffer of
fixed size (block_size_). The ring buffer contains the last block_size_
index positions of the given hash key in the compressed data. */
func (*h5) HashTypeLength() uint {
return 4
}
func (*h5) StoreLookahead() uint {
return 4
}
/* HashBytes is the function that chooses the bucket to place the address in. */
func hashBytesH5(data []byte, shift int) uint32 {
var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint32(h >> uint(shift))
}
type h5 struct {
hasherCommon
bucket_size_ uint
block_size_ uint
hash_shift_ int
block_mask_ uint32
num []uint16
buckets []uint32
}
func (h *h5) Initialize(params *encoderParams) {
h.hash_shift_ = 32 - h.params.bucket_bits
h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
h.block_size_ = uint(1) << uint(h.params.block_bits)
h.block_mask_ = uint32(h.block_size_ - 1)
h.num = make([]uint16, h.bucket_size_)
h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
}
func (h *h5) Prepare(one_shot bool, input_size uint, data []byte) {
var num []uint16 = h.num
var partial_prepare_threshold uint = h.bucket_size_ >> 6
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var key uint32 = hashBytesH5(data[i:], h.hash_shift_)
num[key] = 0
}
} else {
for i := 0; i < int(h.bucket_size_); i++ {
num[i] = 0
}
}
}
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
func (h *h5) Store(data []byte, mask uint, ix uint) {
var num []uint16 = h.num
var key uint32 = hashBytesH5(data[ix&mask:], h.hash_shift_)
var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
h.buckets[offset] = uint32(ix)
num[key]++
}
func (h *h5) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *h5) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ringbuffer_mask, position-3)
h.Store(ringbuffer, ringbuffer_mask, position-2)
h.Store(ringbuffer, ringbuffer_mask, position-1)
}
}
func (h *h5) PrepareDistanceCache(distance_cache []int) {
prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
}
/* Find a longest backward match of &data[cur_ix] up to the length of
max_length and stores the position cur_ix in the hash table.
REQUIRES: PrepareDistanceCacheH5 must be invoked for current distance cache
values; if this method is invoked repeatedly with the same distance
cache values, it is enough to invoke PrepareDistanceCacheH5 once.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *h5) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
var num []uint16 = h.num
var buckets []uint32 = h.buckets
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var min_score uint = out.score
var best_score uint = out.score
var best_len uint = out.len
var i uint
var bucket []uint32
/* Don't accept a short copy from far away. */
out.len = 0
out.len_code_delta = 0
/* Try last distance first. */
for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
var backward uint = uint(distance_cache[i])
var prev_ix uint = uint(cur_ix - backward)
if prev_ix >= cur_ix {
continue
}
if backward > max_backward {
continue
}
prev_ix &= ring_buffer_mask
if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
continue
}
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 3 || (len == 2 && i < 2) {
/* Comparing for >= 2 does not change the semantics, but just saves for
a few unnecessary binary logarithms in backward reference score,
since we are not interested in such short matches. */
var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
if i != 0 {
score -= backwardReferencePenaltyUsingLastDistance(i)
}
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = best_score
}
}
}
}
}
{
var key uint32 = hashBytesH5(data[cur_ix_masked:], h.hash_shift_)
bucket = buckets[key<<uint(h.params.block_bits):]
var down uint
if uint(num[key]) > h.block_size_ {
down = uint(num[key]) - h.block_size_
} else {
down = 0
}
for i = uint(num[key]); i > down; {
var prev_ix uint
i--
prev_ix = uint(bucket[uint32(i)&h.block_mask_])
var backward uint = cur_ix - prev_ix
if backward > max_backward {
break
}
prev_ix &= ring_buffer_mask
if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
continue
}
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
/* Comparing for >= 3 does not change the semantics, but just saves
for a few unnecessary binary logarithms in backward reference
score, since we are not interested in such short matches. */
var score uint = backwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = best_score
}
}
}
}
bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
num[key]++
}
if min_score == out.score {
searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
}
}
+216
View File
@@ -0,0 +1,216 @@
package brotli
import "encoding/binary"
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (bucket_size_) to a ring buffer of
fixed size (block_size_). The ring buffer contains the last block_size_
index positions of the given hash key in the compressed data. */
func (*h6) HashTypeLength() uint {
return 8
}
func (*h6) StoreLookahead() uint {
return 8
}
/* HashBytes is the function that chooses the bucket to place the address in. */
func hashBytesH6(data []byte, mask uint64, shift int) uint32 {
var h uint64 = (binary.LittleEndian.Uint64(data) & mask) * kHashMul64Long
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint32(h >> uint(shift))
}
type h6 struct {
hasherCommon
bucket_size_ uint
block_size_ uint
hash_shift_ int
hash_mask_ uint64
block_mask_ uint32
num []uint16
buckets []uint32
}
func (h *h6) Initialize(params *encoderParams) {
h.hash_shift_ = 64 - h.params.bucket_bits
h.hash_mask_ = (^(uint64(0))) >> uint(64-8*h.params.hash_len)
h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
h.block_size_ = uint(1) << uint(h.params.block_bits)
h.block_mask_ = uint32(h.block_size_ - 1)
h.num = make([]uint16, h.bucket_size_)
h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
}
func (h *h6) Prepare(one_shot bool, input_size uint, data []byte) {
var num []uint16 = h.num
var partial_prepare_threshold uint = h.bucket_size_ >> 6
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var key uint32 = hashBytesH6(data[i:], h.hash_mask_, h.hash_shift_)
num[key] = 0
}
} else {
for i := 0; i < int(h.bucket_size_); i++ {
num[i] = 0
}
}
}
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
func (h *h6) Store(data []byte, mask uint, ix uint) {
var num []uint16 = h.num
var key uint32 = hashBytesH6(data[ix&mask:], h.hash_mask_, h.hash_shift_)
var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
h.buckets[offset] = uint32(ix)
num[key]++
}
func (h *h6) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *h6) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ringbuffer_mask, position-3)
h.Store(ringbuffer, ringbuffer_mask, position-2)
h.Store(ringbuffer, ringbuffer_mask, position-1)
}
}
func (h *h6) PrepareDistanceCache(distance_cache []int) {
prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
}
/* Find a longest backward match of &data[cur_ix] up to the length of
max_length and stores the position cur_ix in the hash table.
REQUIRES: PrepareDistanceCacheH6 must be invoked for current distance cache
values; if this method is invoked repeatedly with the same distance
cache values, it is enough to invoke PrepareDistanceCacheH6 once.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *h6) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
var num []uint16 = h.num
var buckets []uint32 = h.buckets
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var min_score uint = out.score
var best_score uint = out.score
var best_len uint = out.len
var i uint
var bucket []uint32
/* Don't accept a short copy from far away. */
out.len = 0
out.len_code_delta = 0
/* Try last distance first. */
for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
var backward uint = uint(distance_cache[i])
var prev_ix uint = uint(cur_ix - backward)
if prev_ix >= cur_ix {
continue
}
if backward > max_backward {
continue
}
prev_ix &= ring_buffer_mask
if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
continue
}
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 3 || (len == 2 && i < 2) {
/* Comparing for >= 2 does not change the semantics, but just saves for
a few unnecessary binary logarithms in backward reference score,
since we are not interested in such short matches. */
var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
if i != 0 {
score -= backwardReferencePenaltyUsingLastDistance(i)
}
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = best_score
}
}
}
}
}
{
var key uint32 = hashBytesH6(data[cur_ix_masked:], h.hash_mask_, h.hash_shift_)
bucket = buckets[key<<uint(h.params.block_bits):]
var down uint
if uint(num[key]) > h.block_size_ {
down = uint(num[key]) - h.block_size_
} else {
down = 0
}
for i = uint(num[key]); i > down; {
var prev_ix uint
i--
prev_ix = uint(bucket[uint32(i)&h.block_mask_])
var backward uint = cur_ix - prev_ix
if backward > max_backward {
break
}
prev_ix &= ring_buffer_mask
if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
continue
}
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
/* Comparing for >= 3 does not change the semantics, but just saves
for a few unnecessary binary logarithms in backward reference
score, since we are not interested in such short matches. */
var score uint = backwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = best_score
}
}
}
}
bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
num[key]++
}
if min_score == out.score {
searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
}
}
+342
View File
@@ -0,0 +1,342 @@
package brotli
import (
"encoding/binary"
"fmt"
)
type hasherCommon struct {
params hasherParams
is_prepared_ bool
dict_num_lookups uint
dict_num_matches uint
}
func (h *hasherCommon) Common() *hasherCommon {
return h
}
type hasherHandle interface {
Common() *hasherCommon
Initialize(params *encoderParams)
Prepare(one_shot bool, input_size uint, data []byte)
StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
HashTypeLength() uint
StoreLookahead() uint
PrepareDistanceCache(distance_cache []int)
FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
Store(data []byte, mask uint, ix uint)
}
const kCutoffTransformsCount uint32 = 10
/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
const kCutoffTransforms uint64 = 0x071B520ADA2D3200
type hasherSearchResult struct {
len uint
distance uint
score uint
len_code_delta int
}
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of ones or zeros.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
const kHashMul32 uint32 = 0x1E35A7BD
const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
func hash14(data []byte) uint32 {
var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - 14)
}
func prepareDistanceCache(distance_cache []int, num_distances int) {
if num_distances > 4 {
var last_distance int = distance_cache[0]
distance_cache[4] = last_distance - 1
distance_cache[5] = last_distance + 1
distance_cache[6] = last_distance - 2
distance_cache[7] = last_distance + 2
distance_cache[8] = last_distance - 3
distance_cache[9] = last_distance + 3
if num_distances > 10 {
var next_last_distance int = distance_cache[1]
distance_cache[10] = next_last_distance - 1
distance_cache[11] = next_last_distance + 1
distance_cache[12] = next_last_distance - 2
distance_cache[13] = next_last_distance + 2
distance_cache[14] = next_last_distance - 3
distance_cache[15] = next_last_distance + 3
}
}
}
const literalByteScore = 135
const distanceBitPenalty = 30
/* Score must be positive after applying maximal penalty. */
const scoreBase = (distanceBitPenalty * 8 * 8)
/* Usually, we always choose the longest backward reference. This function
allows for the exception of that rule.
If we choose a backward reference that is further away, it will
usually be coded with more bits. We approximate this by assuming
log2(distance). If the distance can be expressed in terms of the
last four distances, we use some heuristic constants to estimate
the bits cost. For the first up to four literals we use the bit
cost of the literals from the literal cost model, after that we
use the average bit cost of the cost model.
This function is used to sometimes discard a longer backward reference
when it is not much longer and the bit cost for encoding it is more
than the saved literals.
backward_reference_offset MUST be positive. */
func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
}
func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
return literalByteScore*uint(copy_length) + scoreBase + 15
}
func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
}
func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
var len uint
var word_idx uint
var offset uint
var matchlen uint
var backward uint
var score uint
len = item & 0x1F
word_idx = item >> 5
offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
if len > max_length {
return false
}
matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
return false
}
{
var cut uint = len - matchlen
var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
}
if backward > max_distance {
return false
}
score = backwardReferenceScore(matchlen, backward)
if score < out.score {
return false
}
out.len = matchlen
out.len_code_delta = int(len) - int(matchlen)
out.distance = backward
out.score = score
return true
}
func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
var key uint
var i uint
var self *hasherCommon = handle.Common()
if self.dict_num_matches < self.dict_num_lookups>>7 {
return
}
key = uint(hash14(data) << 1)
for i = 0; ; (func() { i++; key++ })() {
var tmp uint
if shallow {
tmp = 1
} else {
tmp = 2
}
if i >= tmp {
break
}
var item uint = uint(dictionary.hash_table[key])
self.dict_num_lookups++
if item != 0 {
var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
if item_matches {
self.dict_num_matches++
}
}
}
}
type backwardMatch struct {
distance uint32
length_and_code uint32
}
func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
self.distance = uint32(dist)
self.length_and_code = uint32(len << 5)
}
func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
self.distance = uint32(dist)
var tmp uint
if len == len_code {
tmp = 0
} else {
tmp = len_code
}
self.length_and_code = uint32(len<<5 | tmp)
}
func backwardMatchLength(self *backwardMatch) uint {
return uint(self.length_and_code >> 5)
}
func backwardMatchLengthCode(self *backwardMatch) uint {
var code uint = uint(self.length_and_code) & 31
if code != 0 {
return code
} else {
return backwardMatchLength(self)
}
}
func hasherReset(handle hasherHandle) {
if handle == nil {
return
}
handle.Common().is_prepared_ = false
}
func newHasher(typ int) hasherHandle {
switch typ {
case 2:
return &hashLongestMatchQuickly{
bucketBits: 16,
bucketSweep: 1,
hashLen: 5,
useDictionary: true,
}
case 3:
return &hashLongestMatchQuickly{
bucketBits: 16,
bucketSweep: 2,
hashLen: 5,
useDictionary: false,
}
case 4:
return &hashLongestMatchQuickly{
bucketBits: 17,
bucketSweep: 4,
hashLen: 5,
useDictionary: true,
}
case 5:
return new(h5)
case 6:
return new(h6)
case 10:
return new(h10)
case 35:
return &hashComposite{
ha: newHasher(3),
hb: &hashRolling{jump: 4},
}
case 40:
return &hashForgetfulChain{
bucketBits: 15,
numBanks: 1,
bankBits: 16,
numLastDistancesToCheck: 4,
}
case 41:
return &hashForgetfulChain{
bucketBits: 15,
numBanks: 1,
bankBits: 16,
numLastDistancesToCheck: 10,
}
case 42:
return &hashForgetfulChain{
bucketBits: 15,
numBanks: 512,
bankBits: 9,
numLastDistancesToCheck: 16,
}
case 54:
return &hashLongestMatchQuickly{
bucketBits: 20,
bucketSweep: 4,
hashLen: 7,
useDictionary: false,
}
case 55:
return &hashComposite{
ha: newHasher(54),
hb: &hashRolling{jump: 4},
}
case 65:
return &hashComposite{
ha: newHasher(6),
hb: &hashRolling{jump: 1},
}
}
panic(fmt.Sprintf("unknown hasher type: %d", typ))
}
func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
var self hasherHandle = nil
var common *hasherCommon = nil
var one_shot bool = (position == 0 && is_last)
if *handle == nil {
chooseHasher(params, &params.hasher)
self = newHasher(params.hasher.type_)
*handle = self
common = self.Common()
common.params = params.hasher
self.Initialize(params)
}
self = *handle
common = self.Common()
if !common.is_prepared_ {
self.Prepare(one_shot, input_size, data)
if position == 0 {
common.dict_num_lookups = 0
common.dict_num_matches = 0
}
common.is_prepared_ = true
}
}
func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
var self hasherHandle
hasherSetup(handle, params, data, position, input_size, is_last)
self = *handle
self.StitchToPreviousBlock(input_size, position, data, mask)
}
+93
View File
@@ -0,0 +1,93 @@
package brotli
/* Copyright 2018 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func (h *hashComposite) HashTypeLength() uint {
var a uint = h.ha.HashTypeLength()
var b uint = h.hb.HashTypeLength()
if a > b {
return a
} else {
return b
}
}
func (h *hashComposite) StoreLookahead() uint {
var a uint = h.ha.StoreLookahead()
var b uint = h.hb.StoreLookahead()
if a > b {
return a
} else {
return b
}
}
/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
and HASHER_B. */
type hashComposite struct {
hasherCommon
ha hasherHandle
hb hasherHandle
params *encoderParams
}
func (h *hashComposite) Initialize(params *encoderParams) {
h.params = params
}
/* TODO: Initialize of the hashers is defered to Prepare (and params
remembered here) because we don't get the one_shot and input_size params
here that are needed to know the memory size of them. Instead provide
those params to all hashers InitializehashComposite */
func (h *hashComposite) Prepare(one_shot bool, input_size uint, data []byte) {
if h.ha == nil {
var common_a *hasherCommon
var common_b *hasherCommon
common_a = h.ha.Common()
common_a.params = h.params.hasher
common_a.is_prepared_ = false
common_a.dict_num_lookups = 0
common_a.dict_num_matches = 0
h.ha.Initialize(h.params)
common_b = h.hb.Common()
common_b.params = h.params.hasher
common_b.is_prepared_ = false
common_b.dict_num_lookups = 0
common_b.dict_num_matches = 0
h.hb.Initialize(h.params)
}
h.ha.Prepare(one_shot, input_size, data)
h.hb.Prepare(one_shot, input_size, data)
}
func (h *hashComposite) Store(data []byte, mask uint, ix uint) {
h.ha.Store(data, mask, ix)
h.hb.Store(data, mask, ix)
}
func (h *hashComposite) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
h.ha.StoreRange(data, mask, ix_start, ix_end)
h.hb.StoreRange(data, mask, ix_start, ix_end)
}
func (h *hashComposite) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
h.ha.StitchToPreviousBlock(num_bytes, position, ringbuffer, ring_buffer_mask)
h.hb.StitchToPreviousBlock(num_bytes, position, ringbuffer, ring_buffer_mask)
}
func (h *hashComposite) PrepareDistanceCache(distance_cache []int) {
h.ha.PrepareDistanceCache(distance_cache)
h.hb.PrepareDistanceCache(distance_cache)
}
func (h *hashComposite) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
h.ha.FindLongestMatch(dictionary, data, ring_buffer_mask, distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out)
h.hb.FindLongestMatch(dictionary, data, ring_buffer_mask, distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out)
}
+252
View File
@@ -0,0 +1,252 @@
package brotli
import "encoding/binary"
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func (*hashForgetfulChain) HashTypeLength() uint {
return 4
}
func (*hashForgetfulChain) StoreLookahead() uint {
return 4
}
/* HashBytes is the function that chooses the bucket to place the address in.*/
func (h *hashForgetfulChain) HashBytes(data []byte) uint {
var hash uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint(hash >> (32 - h.bucketBits))
}
type slot struct {
delta uint16
next uint16
}
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
Hashes are stored in chains which are bucketed to groups. Group of chains
share a storage "bank". When more than "bank size" chain nodes are added,
oldest nodes are replaced; this way several chains may share a tail. */
type hashForgetfulChain struct {
hasherCommon
bucketBits uint
numBanks uint
bankBits uint
numLastDistancesToCheck int
addr []uint32
head []uint16
tiny_hash [65536]byte
banks [][]slot
free_slot_idx []uint16
max_hops uint
}
func (h *hashForgetfulChain) Initialize(params *encoderParams) {
var q uint
if params.quality > 6 {
q = 7
} else {
q = 8
}
h.max_hops = q << uint(params.quality-4)
bankSize := 1 << h.bankBits
bucketSize := 1 << h.bucketBits
h.addr = make([]uint32, bucketSize)
h.head = make([]uint16, bucketSize)
h.banks = make([][]slot, h.numBanks)
for i := range h.banks {
h.banks[i] = make([]slot, bankSize)
}
h.free_slot_idx = make([]uint16, h.numBanks)
}
func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte) {
var partial_prepare_threshold uint = (1 << h.bucketBits) >> 6
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var bucket uint = h.HashBytes(data[i:])
/* See InitEmpty comment. */
h.addr[bucket] = 0xCCCCCCCC
h.head[bucket] = 0xCCCC
}
} else {
/* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
processed by hasher never reaches 3GB + 64M; this makes all new chains
to be terminated after the first node. */
for i := range h.addr {
h.addr[i] = 0xCCCCCCCC
}
for i := range h.head {
h.head[i] = 0
}
}
h.tiny_hash = [65536]byte{}
for i := range h.free_slot_idx {
h.free_slot_idx[i] = 0
}
}
/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
node to corresponding chain; also update tiny_hash for current position. */
func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) {
var key uint = h.HashBytes(data[ix&mask:])
var bank uint = key & (h.numBanks - 1)
idx := uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1)
h.free_slot_idx[bank]++
var delta uint = ix - uint(h.addr[key])
h.tiny_hash[uint16(ix)] = byte(key)
if delta > 0xFFFF {
delta = 0xFFFF
}
h.banks[bank][idx].delta = uint16(delta)
h.banks[bank][idx].next = h.head[key]
h.addr[key] = uint32(ix)
h.head[key] = uint16(idx)
}
func (h *hashForgetfulChain) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *hashForgetfulChain) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ring_buffer_mask, position-3)
h.Store(ringbuffer, ring_buffer_mask, position-2)
h.Store(ringbuffer, ring_buffer_mask, position-1)
}
}
func (h *hashForgetfulChain) PrepareDistanceCache(distance_cache []int) {
prepareDistanceCache(distance_cache, h.numLastDistancesToCheck)
}
/* Find a longest backward match of &data[cur_ix] up to the length of
max_length and stores the position cur_ix in the hash table.
REQUIRES: PrepareDistanceCachehashForgetfulChain must be invoked for current distance cache
values; if this method is invoked repeatedly with the same distance
cache values, it is enough to invoke PrepareDistanceCachehashForgetfulChain once.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *hashForgetfulChain) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var min_score uint = out.score
var best_score uint = out.score
var best_len uint = out.len
var key uint = h.HashBytes(data[cur_ix_masked:])
var tiny_hash byte = byte(key)
/* Don't accept a short copy from far away. */
out.len = 0
out.len_code_delta = 0
/* Try last distance first. */
for i := 0; i < h.numLastDistancesToCheck; i++ {
var backward uint = uint(distance_cache[i])
var prev_ix uint = (cur_ix - backward)
/* For distance code 0 we want to consider 2-byte matches. */
if i > 0 && h.tiny_hash[uint16(prev_ix)] != tiny_hash {
continue
}
if prev_ix >= cur_ix || backward > max_backward {
continue
}
prev_ix &= ring_buffer_mask
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 2 {
var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
if i != 0 {
score -= backwardReferencePenaltyUsingLastDistance(uint(i))
}
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = best_score
}
}
}
}
}
{
var bank uint = key & (h.numBanks - 1)
var backward uint = 0
var hops uint = h.max_hops
var delta uint = cur_ix - uint(h.addr[key])
var slot uint = uint(h.head[key])
for {
tmp6 := hops
hops--
if tmp6 == 0 {
break
}
var prev_ix uint
var last uint = slot
backward += delta
if backward > max_backward {
break
}
prev_ix = (cur_ix - backward) & ring_buffer_mask
slot = uint(h.banks[bank][last].next)
delta = uint(h.banks[bank][last].delta)
if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
continue
}
{
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
/* Comparing for >= 3 does not change the semantics, but just saves
for a few unnecessary binary logarithms in backward reference
score, since we are not interested in such short matches. */
var score uint = backwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = best_score
}
}
}
}
h.Store(data, ring_buffer_mask, cur_ix)
}
if out.score == min_score {
searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
}
}
+214
View File
@@ -0,0 +1,214 @@
package brotli
import "encoding/binary"
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
a little faster (0.5% - 1%) and it compresses 0.15% better on small text
and HTML inputs. */
func (*hashLongestMatchQuickly) HashTypeLength() uint {
return 8
}
func (*hashLongestMatchQuickly) StoreLookahead() uint {
return 8
}
/* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and hashLongestMatchQuickly
classes have separate, different implementations of hashing. */
func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint32(hash >> (64 - h.bucketBits))
}
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (1 << 16). Starting from the
given index, 1 buckets are used to store values of a key. */
type hashLongestMatchQuickly struct {
hasherCommon
bucketBits uint
bucketSweep int
hashLen uint
useDictionary bool
buckets []uint32
}
func (h *hashLongestMatchQuickly) Initialize(params *encoderParams) {
h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
}
func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var key uint32 = h.HashBytes(data[i:])
for j := 0; j < h.bucketSweep; j++ {
h.buckets[key+uint32(j)] = 0
}
}
} else {
/* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */
for i := range h.buckets {
h.buckets[i] = 0
}
}
}
/* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
var key uint32 = h.HashBytes(data[ix&mask:])
var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
/* Wiggle the value with the bucket sweep range. */
h.buckets[key+off] = uint32(ix)
}
func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ringbuffer_mask, position-3)
h.Store(ringbuffer, ringbuffer_mask, position-2)
h.Store(ringbuffer, ringbuffer_mask, position-1)
}
}
func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
}
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
up to the length of max_length and stores the position cur_ix in the
hash table.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
var best_len_in uint = out.len
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var key uint32 = h.HashBytes(data[cur_ix_masked:])
var compare_char int = int(data[cur_ix_masked+best_len_in])
var min_score uint = out.score
var best_score uint = out.score
var best_len uint = best_len_in
var cached_backward uint = uint(distance_cache[0])
var prev_ix uint = cur_ix - cached_backward
var bucket []uint32
out.len_code_delta = 0
if prev_ix < cur_ix {
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char == int(data[prev_ix+best_len]) {
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
best_score = score
best_len = uint(len)
out.len = uint(len)
out.distance = cached_backward
out.score = best_score
compare_char = int(data[cur_ix_masked+best_len])
if h.bucketSweep == 1 {
h.buckets[key] = uint32(cur_ix)
return
}
}
}
}
}
if h.bucketSweep == 1 {
var backward uint
var len uint
/* Only one to look for, don't bother to prepare for a loop. */
prev_ix = uint(h.buckets[key])
h.buckets[key] = uint32(cur_ix)
backward = cur_ix - prev_ix
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len_in]) {
return
}
if backward == 0 || backward > max_backward {
return
}
len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = backwardReferenceScore(uint(len), backward)
if best_score < score {
out.len = uint(len)
out.distance = backward
out.score = score
return
}
}
} else {
bucket = h.buckets[key:]
var i int
prev_ix = uint(bucket[0])
bucket = bucket[1:]
for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
var backward uint = cur_ix - prev_ix
var len uint
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len]) {
continue
}
if backward == 0 || backward > max_backward {
continue
}
len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = backwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = score
compare_char = int(data[cur_ix_masked+best_len])
}
}
}
}
if h.useDictionary && min_score == out.score {
searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
}
h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
}
+168
View File
@@ -0,0 +1,168 @@
package brotli
/* Copyright 2018 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* NOTE: this hasher does not search in the dictionary. It is used as
backup-hasher, the main hasher already searches in it. */
const kRollingHashMul32 uint32 = 69069
const kInvalidPosHashRolling uint32 = 0xffffffff
/* This hasher uses a longer forward length, but returning a higher value here
will hurt compression by the main hasher when combined with a composite
hasher. The hasher tests for forward itself instead. */
func (*hashRolling) HashTypeLength() uint {
return 4
}
func (*hashRolling) StoreLookahead() uint {
return 4
}
/* Computes a code from a single byte. A lookup table of 256 values could be
used, but simply adding 1 works about as good. */
func (*hashRolling) HashByte(b byte) uint32 {
return uint32(b) + 1
}
func (h *hashRolling) HashRollingFunctionInitial(state uint32, add byte, factor uint32) uint32 {
return uint32(factor*state + h.HashByte(add))
}
func (h *hashRolling) HashRollingFunction(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 {
return uint32(factor*state + h.HashByte(add) - factor_remove*h.HashByte(rem))
}
/* Rolling hash for long distance long string matches. Stores one position
per bucket, bucket key is computed over a long region. */
type hashRolling struct {
hasherCommon
jump int
state uint32
table []uint32
next_ix uint
factor uint32
factor_remove uint32
}
func (h *hashRolling) Initialize(params *encoderParams) {
h.state = 0
h.next_ix = 0
h.factor = kRollingHashMul32
/* Compute the factor of the oldest byte to remove: factor**steps modulo
0xffffffff (the multiplications rely on 32-bit overflow) */
h.factor_remove = 1
for i := 0; i < 32; i += h.jump {
h.factor_remove *= h.factor
}
h.table = make([]uint32, 16777216)
for i := 0; i < 16777216; i++ {
h.table[i] = kInvalidPosHashRolling
}
}
func (h *hashRolling) Prepare(one_shot bool, input_size uint, data []byte) {
/* Too small size, cannot use this hasher. */
if input_size < 32 {
return
}
h.state = 0
for i := 0; i < 32; i += h.jump {
h.state = h.HashRollingFunctionInitial(h.state, data[i], h.factor)
}
}
func (*hashRolling) Store(data []byte, mask uint, ix uint) {
}
func (*hashRolling) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
}
func (h *hashRolling) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
var position_masked uint
/* In this case we must re-initialize the hasher from scratch from the
current position. */
var available uint = num_bytes
if position&uint(h.jump-1) != 0 {
var diff uint = uint(h.jump) - (position & uint(h.jump-1))
if diff > available {
available = 0
} else {
available = available - diff
}
position += diff
}
position_masked = position & ring_buffer_mask
/* wrapping around ringbuffer not handled. */
if available > ring_buffer_mask-position_masked {
available = ring_buffer_mask - position_masked
}
h.Prepare(false, available, ringbuffer[position&ring_buffer_mask:])
h.next_ix = position
}
func (*hashRolling) PrepareDistanceCache(distance_cache []int) {
}
func (h *hashRolling) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var pos uint = h.next_ix
if cur_ix&uint(h.jump-1) != 0 {
return
}
/* Not enough lookahead */
if max_length < 32 {
return
}
for pos = h.next_ix; pos <= cur_ix; pos += uint(h.jump) {
var code uint32 = h.state & ((16777216 * 64) - 1)
var rem byte = data[pos&ring_buffer_mask]
var add byte = data[(pos+32)&ring_buffer_mask]
var found_ix uint = uint(kInvalidPosHashRolling)
h.state = h.HashRollingFunction(h.state, add, rem, h.factor, h.factor_remove)
if code < 16777216 {
found_ix = uint(h.table[code])
h.table[code] = uint32(pos)
if pos == cur_ix && uint32(found_ix) != kInvalidPosHashRolling {
/* The cast to 32-bit makes backward distances up to 4GB work even
if cur_ix is above 4GB, despite using 32-bit values in the table. */
var backward uint = uint(uint32(cur_ix - found_ix))
if backward <= max_backward {
var found_ix_masked uint = found_ix & ring_buffer_mask
var len uint = findMatchLengthWithLimit(data[found_ix_masked:], data[cur_ix_masked:], max_length)
if len >= 4 && len > out.len {
var score uint = backwardReferenceScore(uint(len), backward)
if score > out.score {
out.len = uint(len)
out.distance = backward
out.score = score
out.len_code_delta = 0
}
}
}
}
}
}
h.next_ix = cur_ix + uint(h.jump)
}
+226
View File
@@ -0,0 +1,226 @@
package brotli
import "math"
/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
const numHistogramDistanceSymbols = 544
type histogramLiteral struct {
data_ [numLiteralSymbols]uint32
total_count_ uint
bit_cost_ float64
}
func histogramClearLiteral(self *histogramLiteral) {
self.data_ = [numLiteralSymbols]uint32{}
self.total_count_ = 0
self.bit_cost_ = math.MaxFloat64
}
func clearHistogramsLiteral(array []histogramLiteral, length uint) {
var i uint
for i = 0; i < length; i++ {
histogramClearLiteral(&array[i:][0])
}
}
func histogramAddLiteral(self *histogramLiteral, val uint) {
self.data_[val]++
self.total_count_++
}
func histogramAddVectorLiteral(self *histogramLiteral, p []byte, n uint) {
self.total_count_ += n
n += 1
for {
n--
if n == 0 {
break
}
self.data_[p[0]]++
p = p[1:]
}
}
func histogramAddHistogramLiteral(self *histogramLiteral, v *histogramLiteral) {
var i uint
self.total_count_ += v.total_count_
for i = 0; i < numLiteralSymbols; i++ {
self.data_[i] += v.data_[i]
}
}
func histogramDataSizeLiteral() uint {
return numLiteralSymbols
}
type histogramCommand struct {
data_ [numCommandSymbols]uint32
total_count_ uint
bit_cost_ float64
}
func histogramClearCommand(self *histogramCommand) {
self.data_ = [numCommandSymbols]uint32{}
self.total_count_ = 0
self.bit_cost_ = math.MaxFloat64
}
func clearHistogramsCommand(array []histogramCommand, length uint) {
var i uint
for i = 0; i < length; i++ {
histogramClearCommand(&array[i:][0])
}
}
func histogramAddCommand(self *histogramCommand, val uint) {
self.data_[val]++
self.total_count_++
}
func histogramAddVectorCommand(self *histogramCommand, p []uint16, n uint) {
self.total_count_ += n
n += 1
for {
n--
if n == 0 {
break
}
self.data_[p[0]]++
p = p[1:]
}
}
func histogramAddHistogramCommand(self *histogramCommand, v *histogramCommand) {
var i uint
self.total_count_ += v.total_count_
for i = 0; i < numCommandSymbols; i++ {
self.data_[i] += v.data_[i]
}
}
func histogramDataSizeCommand() uint {
return numCommandSymbols
}
type histogramDistance struct {
data_ [numDistanceSymbols]uint32
total_count_ uint
bit_cost_ float64
}
func histogramClearDistance(self *histogramDistance) {
self.data_ = [numDistanceSymbols]uint32{}
self.total_count_ = 0
self.bit_cost_ = math.MaxFloat64
}
func clearHistogramsDistance(array []histogramDistance, length uint) {
var i uint
for i = 0; i < length; i++ {
histogramClearDistance(&array[i:][0])
}
}
func histogramAddDistance(self *histogramDistance, val uint) {
self.data_[val]++
self.total_count_++
}
func histogramAddVectorDistance(self *histogramDistance, p []uint16, n uint) {
self.total_count_ += n
n += 1
for {
n--
if n == 0 {
break
}
self.data_[p[0]]++
p = p[1:]
}
}
func histogramAddHistogramDistance(self *histogramDistance, v *histogramDistance) {
var i uint
self.total_count_ += v.total_count_
for i = 0; i < numDistanceSymbols; i++ {
self.data_[i] += v.data_[i]
}
}
func histogramDataSizeDistance() uint {
return numDistanceSymbols
}
type blockSplitIterator struct {
split_ *blockSplit
idx_ uint
type_ uint
length_ uint
}
func initBlockSplitIterator(self *blockSplitIterator, split *blockSplit) {
self.split_ = split
self.idx_ = 0
self.type_ = 0
if len(split.lengths) > 0 {
self.length_ = uint(split.lengths[0])
} else {
self.length_ = 0
}
}
func blockSplitIteratorNext(self *blockSplitIterator) {
if self.length_ == 0 {
self.idx_++
self.type_ = uint(self.split_.types[self.idx_])
self.length_ = uint(self.split_.lengths[self.idx_])
}
self.length_--
}
func buildHistogramsWithContext(cmds []command, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit, ringbuffer []byte, start_pos uint, mask uint, prev_byte byte, prev_byte2 byte, context_modes []int, literal_histograms []histogramLiteral, insert_and_copy_histograms []histogramCommand, copy_dist_histograms []histogramDistance) {
var pos uint = start_pos
var literal_it blockSplitIterator
var insert_and_copy_it blockSplitIterator
var dist_it blockSplitIterator
initBlockSplitIterator(&literal_it, literal_split)
initBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split)
initBlockSplitIterator(&dist_it, dist_split)
for i := range cmds {
var cmd *command = &cmds[i]
var j uint
blockSplitIteratorNext(&insert_and_copy_it)
histogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_], uint(cmd.cmd_prefix_))
/* TODO: unwrap iterator blocks. */
for j = uint(cmd.insert_len_); j != 0; j-- {
var context uint
blockSplitIteratorNext(&literal_it)
context = literal_it.type_
if context_modes != nil {
var lut contextLUT = getContextLUT(context_modes[context])
context = (context << literalContextBits) + uint(getContext(prev_byte, prev_byte2, lut))
}
histogramAddLiteral(&literal_histograms[context], uint(ringbuffer[pos&mask]))
prev_byte2 = prev_byte
prev_byte = ringbuffer[pos&mask]
pos++
}
pos += uint(commandCopyLen(cmd))
if commandCopyLen(cmd) != 0 {
prev_byte2 = ringbuffer[(pos-2)&mask]
prev_byte = ringbuffer[(pos-1)&mask]
if cmd.cmd_prefix_ >= 128 {
var context uint
blockSplitIteratorNext(&dist_it)
context = uint(uint32(dist_it.type_<<distanceContextBits) + commandDistanceContext(cmd))
histogramAddDistance(&copy_dist_histograms[context], uint(cmd.dist_prefix_)&0x3FF)
}
}
}
}
+184
View File
@@ -0,0 +1,184 @@
package brotli
import (
"compress/gzip"
"io"
"net/http"
"strings"
)
// HTTPCompressor chooses a compression method (brotli, gzip, or none) based on
// the Accept-Encoding header, sets the Content-Encoding header, and returns a
// WriteCloser that implements that compression. The Close method must be called
// before the current HTTP handler returns.
func HTTPCompressor(w http.ResponseWriter, r *http.Request) io.WriteCloser {
if w.Header().Get("Vary") == "" {
w.Header().Set("Vary", "Accept-Encoding")
}
encoding := negotiateContentEncoding(r, []string{"br", "gzip"})
switch encoding {
case "br":
w.Header().Set("Content-Encoding", "br")
return NewWriterV2(w, DefaultCompression)
case "gzip":
w.Header().Set("Content-Encoding", "gzip")
return gzip.NewWriter(w)
}
return nopCloser{w}
}
// negotiateContentEncoding returns the best offered content encoding for the
// request's Accept-Encoding header. If two offers match with equal weight and
// then the offer earlier in the list is preferred. If no offers are
// acceptable, then "" is returned.
func negotiateContentEncoding(r *http.Request, offers []string) string {
bestOffer := "identity"
bestQ := -1.0
specs := parseAccept(r.Header, "Accept-Encoding")
for _, offer := range offers {
for _, spec := range specs {
if spec.Q > bestQ &&
(spec.Value == "*" || spec.Value == offer) {
bestQ = spec.Q
bestOffer = offer
}
}
}
if bestQ == 0 {
bestOffer = ""
}
return bestOffer
}
// acceptSpec describes an Accept* header.
type acceptSpec struct {
Value string
Q float64
}
// parseAccept parses Accept* headers.
func parseAccept(header http.Header, key string) (specs []acceptSpec) {
loop:
for _, s := range header[key] {
for {
var spec acceptSpec
spec.Value, s = expectTokenSlash(s)
if spec.Value == "" {
continue loop
}
spec.Q = 1.0
s = skipSpace(s)
if strings.HasPrefix(s, ";") {
s = skipSpace(s[1:])
if !strings.HasPrefix(s, "q=") {
continue loop
}
spec.Q, s = expectQuality(s[2:])
if spec.Q < 0.0 {
continue loop
}
}
specs = append(specs, spec)
s = skipSpace(s)
if !strings.HasPrefix(s, ",") {
continue loop
}
s = skipSpace(s[1:])
}
}
return
}
func skipSpace(s string) (rest string) {
i := 0
for ; i < len(s); i++ {
if octetTypes[s[i]]&isSpace == 0 {
break
}
}
return s[i:]
}
func expectTokenSlash(s string) (token, rest string) {
i := 0
for ; i < len(s); i++ {
b := s[i]
if (octetTypes[b]&isToken == 0) && b != '/' {
break
}
}
return s[:i], s[i:]
}
func expectQuality(s string) (q float64, rest string) {
switch {
case len(s) == 0:
return -1, ""
case s[0] == '0':
q = 0
case s[0] == '1':
q = 1
default:
return -1, ""
}
s = s[1:]
if !strings.HasPrefix(s, ".") {
return q, s
}
s = s[1:]
i := 0
n := 0
d := 1
for ; i < len(s); i++ {
b := s[i]
if b < '0' || b > '9' {
break
}
n = n*10 + int(b) - '0'
d *= 10
}
return q + float64(n)/float64(d), s[i:]
}
// Octet types from RFC 2616.
var octetTypes [256]octetType
type octetType byte
const (
isToken octetType = 1 << iota
isSpace
)
func init() {
// OCTET = <any 8-bit sequence of data>
// CHAR = <any US-ASCII character (octets 0 - 127)>
// CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
// CR = <US-ASCII CR, carriage return (13)>
// LF = <US-ASCII LF, linefeed (10)>
// SP = <US-ASCII SP, space (32)>
// HT = <US-ASCII HT, horizontal-tab (9)>
// <"> = <US-ASCII double-quote mark (34)>
// CRLF = CR LF
// LWS = [CRLF] 1*( SP | HT )
// TEXT = <any OCTET except CTLs, but including LWS>
// separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <">
// | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT
// token = 1*<any CHAR except CTLs or separators>
// qdtext = <any TEXT except <">>
for c := 0; c < 256; c++ {
var t octetType
isCtl := c <= 31 || c == 127
isChar := 0 <= c && c <= 127
isSeparator := strings.ContainsRune(" \t\"(),/:;<=>?@[]\\{}", rune(c))
if strings.ContainsRune(" \t\r\n", rune(c)) {
t |= isSpace
}
if isChar && !isCtl && !isSeparator {
t |= isToken
}
octetTypes[c] = t
}
}
+653
View File
@@ -0,0 +1,653 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Utilities for building Huffman decoding tables. */
const huffmanMaxCodeLength = 15
/* Maximum possible Huffman table size for an alphabet size of (index * 32),
max code length 15 and root table bits 8. */
var kMaxHuffmanTableSize = []uint16{
256,
402,
436,
468,
500,
534,
566,
598,
630,
662,
694,
726,
758,
790,
822,
854,
886,
920,
952,
984,
1016,
1048,
1080,
1112,
1144,
1176,
1208,
1240,
1272,
1304,
1336,
1368,
1400,
1432,
1464,
1496,
1528,
}
/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
const huffmanMaxSize26 = 396
/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
const huffmanMaxSize258 = 632
/* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
const huffmanMaxSize272 = 646
const huffmanMaxCodeLengthCodeLength = 5
/* Do not create this struct directly - use the ConstructHuffmanCode
* constructor below! */
type huffmanCode struct {
bits byte
value uint16
}
func constructHuffmanCode(bits byte, value uint16) huffmanCode {
var h huffmanCode
h.bits = bits
h.value = value
return h
}
/* Builds Huffman lookup table assuming code lengths are in symbol order. */
/* Builds Huffman lookup table assuming code lengths are in symbol order.
Returns size of resulting table. */
/* Builds a simple Huffman table. The |num_symbols| parameter is to be
interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
4 means 4 symbols with lengths [1, 2, 3, 3]. */
/* Contains a collection of Huffman trees with the same alphabet size. */
/* max_symbol is needed due to simple codes since log2(alphabet_size) could be
greater than log2(max_symbol). */
type huffmanTreeGroup struct {
htrees [][]huffmanCode
codes []huffmanCode
alphabet_size uint16
max_symbol uint16
num_htrees uint16
}
const reverseBitsMax = 8
const reverseBitsBase = 0
var kReverseBits = [1 << reverseBitsMax]byte{
0x00,
0x80,
0x40,
0xC0,
0x20,
0xA0,
0x60,
0xE0,
0x10,
0x90,
0x50,
0xD0,
0x30,
0xB0,
0x70,
0xF0,
0x08,
0x88,
0x48,
0xC8,
0x28,
0xA8,
0x68,
0xE8,
0x18,
0x98,
0x58,
0xD8,
0x38,
0xB8,
0x78,
0xF8,
0x04,
0x84,
0x44,
0xC4,
0x24,
0xA4,
0x64,
0xE4,
0x14,
0x94,
0x54,
0xD4,
0x34,
0xB4,
0x74,
0xF4,
0x0C,
0x8C,
0x4C,
0xCC,
0x2C,
0xAC,
0x6C,
0xEC,
0x1C,
0x9C,
0x5C,
0xDC,
0x3C,
0xBC,
0x7C,
0xFC,
0x02,
0x82,
0x42,
0xC2,
0x22,
0xA2,
0x62,
0xE2,
0x12,
0x92,
0x52,
0xD2,
0x32,
0xB2,
0x72,
0xF2,
0x0A,
0x8A,
0x4A,
0xCA,
0x2A,
0xAA,
0x6A,
0xEA,
0x1A,
0x9A,
0x5A,
0xDA,
0x3A,
0xBA,
0x7A,
0xFA,
0x06,
0x86,
0x46,
0xC6,
0x26,
0xA6,
0x66,
0xE6,
0x16,
0x96,
0x56,
0xD6,
0x36,
0xB6,
0x76,
0xF6,
0x0E,
0x8E,
0x4E,
0xCE,
0x2E,
0xAE,
0x6E,
0xEE,
0x1E,
0x9E,
0x5E,
0xDE,
0x3E,
0xBE,
0x7E,
0xFE,
0x01,
0x81,
0x41,
0xC1,
0x21,
0xA1,
0x61,
0xE1,
0x11,
0x91,
0x51,
0xD1,
0x31,
0xB1,
0x71,
0xF1,
0x09,
0x89,
0x49,
0xC9,
0x29,
0xA9,
0x69,
0xE9,
0x19,
0x99,
0x59,
0xD9,
0x39,
0xB9,
0x79,
0xF9,
0x05,
0x85,
0x45,
0xC5,
0x25,
0xA5,
0x65,
0xE5,
0x15,
0x95,
0x55,
0xD5,
0x35,
0xB5,
0x75,
0xF5,
0x0D,
0x8D,
0x4D,
0xCD,
0x2D,
0xAD,
0x6D,
0xED,
0x1D,
0x9D,
0x5D,
0xDD,
0x3D,
0xBD,
0x7D,
0xFD,
0x03,
0x83,
0x43,
0xC3,
0x23,
0xA3,
0x63,
0xE3,
0x13,
0x93,
0x53,
0xD3,
0x33,
0xB3,
0x73,
0xF3,
0x0B,
0x8B,
0x4B,
0xCB,
0x2B,
0xAB,
0x6B,
0xEB,
0x1B,
0x9B,
0x5B,
0xDB,
0x3B,
0xBB,
0x7B,
0xFB,
0x07,
0x87,
0x47,
0xC7,
0x27,
0xA7,
0x67,
0xE7,
0x17,
0x97,
0x57,
0xD7,
0x37,
0xB7,
0x77,
0xF7,
0x0F,
0x8F,
0x4F,
0xCF,
0x2F,
0xAF,
0x6F,
0xEF,
0x1F,
0x9F,
0x5F,
0xDF,
0x3F,
0xBF,
0x7F,
0xFF,
}
const reverseBitsLowest = (uint64(1) << (reverseBitsMax - 1 + reverseBitsBase))
/* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
where reverse(value, len) is the bit-wise reversal of the len least
significant bits of value. */
func reverseBits8(num uint64) uint64 {
return uint64(kReverseBits[num])
}
/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
/* Assumes that end is an integer multiple of step */
func replicateValue(table []huffmanCode, step int, end int, code huffmanCode) {
for {
end -= step
table[end] = code
if end <= 0 {
break
}
}
}
/* Returns the table width of the next 2nd level table. |count| is the histogram
of bit lengths for the remaining symbols, |len| is the code length of the
next processed symbol. */
func nextTableBitSize(count []uint16, len int, root_bits int) int {
var left int = 1 << uint(len-root_bits)
for len < huffmanMaxCodeLength {
left -= int(count[len])
if left <= 0 {
break
}
len++
left <<= 1
}
return len - root_bits
}
func buildCodeLengthsHuffmanTable(table []huffmanCode, code_lengths []byte, count []uint16) {
var code huffmanCode /* current table entry */ /* symbol index in original or sorted table */ /* prefix code */ /* prefix code addend */ /* step size to replicate values in current table */ /* size of current table */ /* symbols sorted by code length */
var symbol int
var key uint64
var key_step uint64
var step int
var table_size int
var sorted [codeLengthCodes]int
var offset [huffmanMaxCodeLengthCodeLength + 1]int
var bits int
var bits_count int
/* offsets in sorted table for each length */
assert(huffmanMaxCodeLengthCodeLength <= reverseBitsMax)
/* Generate offsets into sorted symbol table by code length. */
symbol = -1
bits = 1
var i int
for i = 0; i < huffmanMaxCodeLengthCodeLength; i++ {
symbol += int(count[bits])
offset[bits] = symbol
bits++
}
/* Symbols with code length 0 are placed after all other symbols. */
offset[0] = codeLengthCodes - 1
/* Sort symbols by length, by symbol order within each length. */
symbol = codeLengthCodes
for {
var i int
for i = 0; i < 6; i++ {
symbol--
sorted[offset[code_lengths[symbol]]] = symbol
offset[code_lengths[symbol]]--
}
if symbol == 0 {
break
}
}
table_size = 1 << huffmanMaxCodeLengthCodeLength
/* Special case: all symbols but one have 0 code length. */
if offset[0] == 0 {
code = constructHuffmanCode(0, uint16(sorted[0]))
for key = 0; key < uint64(table_size); key++ {
table[key] = code
}
return
}
/* Fill in table. */
key = 0
key_step = reverseBitsLowest
symbol = 0
bits = 1
step = 2
for {
for bits_count = int(count[bits]); bits_count != 0; bits_count-- {
code = constructHuffmanCode(byte(bits), uint16(sorted[symbol]))
symbol++
replicateValue(table[reverseBits8(key):], step, table_size, code)
key += key_step
}
step <<= 1
key_step >>= 1
bits++
if bits > huffmanMaxCodeLengthCodeLength {
break
}
}
}
func buildHuffmanTable(root_table []huffmanCode, root_bits int, symbol_lists symbolList, count []uint16) uint32 {
var code huffmanCode /* current table entry */ /* next available space in table */ /* current code length */ /* symbol index in original or sorted table */ /* prefix code */ /* prefix code addend */ /* 2nd level table prefix code */ /* 2nd level table prefix code addend */ /* step size to replicate values in current table */ /* key length of current table */ /* size of current table */ /* sum of root table size and 2nd level table sizes */
var table []huffmanCode
var len int
var symbol int
var key uint64
var key_step uint64
var sub_key uint64
var sub_key_step uint64
var step int
var table_bits int
var table_size int
var total_size int
var max_length int = -1
var bits int
var bits_count int
assert(root_bits <= reverseBitsMax)
assert(huffmanMaxCodeLength-root_bits <= reverseBitsMax)
for symbolListGet(symbol_lists, max_length) == 0xFFFF {
max_length--
}
max_length += huffmanMaxCodeLength + 1
table = root_table
table_bits = root_bits
table_size = 1 << uint(table_bits)
total_size = table_size
/* Fill in the root table. Reduce the table size to if possible,
and create the repetitions by memcpy. */
if table_bits > max_length {
table_bits = max_length
table_size = 1 << uint(table_bits)
}
key = 0
key_step = reverseBitsLowest
bits = 1
step = 2
for {
symbol = bits - (huffmanMaxCodeLength + 1)
for bits_count = int(count[bits]); bits_count != 0; bits_count-- {
symbol = int(symbolListGet(symbol_lists, symbol))
code = constructHuffmanCode(byte(bits), uint16(symbol))
replicateValue(table[reverseBits8(key):], step, table_size, code)
key += key_step
}
step <<= 1
key_step >>= 1
bits++
if bits > table_bits {
break
}
}
/* If root_bits != table_bits then replicate to fill the remaining slots. */
for total_size != table_size {
copy(table[table_size:], table[:uint(table_size)])
table_size <<= 1
}
/* Fill in 2nd level tables and add pointers to root table. */
key_step = reverseBitsLowest >> uint(root_bits-1)
sub_key = reverseBitsLowest << 1
sub_key_step = reverseBitsLowest
len = root_bits + 1
step = 2
for ; len <= max_length; len++ {
symbol = len - (huffmanMaxCodeLength + 1)
for ; count[len] != 0; count[len]-- {
if sub_key == reverseBitsLowest<<1 {
table = table[table_size:]
table_bits = nextTableBitSize(count, int(len), root_bits)
table_size = 1 << uint(table_bits)
total_size += table_size
sub_key = reverseBits8(key)
key += key_step
root_table[sub_key] = constructHuffmanCode(byte(table_bits+root_bits), uint16(uint64(uint(-cap(table)+cap(root_table)))-sub_key))
sub_key = 0
}
symbol = int(symbolListGet(symbol_lists, symbol))
code = constructHuffmanCode(byte(len-root_bits), uint16(symbol))
replicateValue(table[reverseBits8(sub_key):], step, table_size, code)
sub_key += sub_key_step
}
step <<= 1
sub_key_step >>= 1
}
return uint32(total_size)
}
func buildSimpleHuffmanTable(table []huffmanCode, root_bits int, val []uint16, num_symbols uint32) uint32 {
var table_size uint32 = 1
var goal_size uint32 = 1 << uint(root_bits)
switch num_symbols {
case 0:
table[0] = constructHuffmanCode(0, val[0])
case 1:
if val[1] > val[0] {
table[0] = constructHuffmanCode(1, val[0])
table[1] = constructHuffmanCode(1, val[1])
} else {
table[0] = constructHuffmanCode(1, val[1])
table[1] = constructHuffmanCode(1, val[0])
}
table_size = 2
case 2:
table[0] = constructHuffmanCode(1, val[0])
table[2] = constructHuffmanCode(1, val[0])
if val[2] > val[1] {
table[1] = constructHuffmanCode(2, val[1])
table[3] = constructHuffmanCode(2, val[2])
} else {
table[1] = constructHuffmanCode(2, val[2])
table[3] = constructHuffmanCode(2, val[1])
}
table_size = 4
case 3:
var i int
var k int
for i = 0; i < 3; i++ {
for k = i + 1; k < 4; k++ {
if val[k] < val[i] {
var t uint16 = val[k]
val[k] = val[i]
val[i] = t
}
}
}
table[0] = constructHuffmanCode(2, val[0])
table[2] = constructHuffmanCode(2, val[1])
table[1] = constructHuffmanCode(2, val[2])
table[3] = constructHuffmanCode(2, val[3])
table_size = 4
case 4:
if val[3] < val[2] {
var t uint16 = val[3]
val[3] = val[2]
val[2] = t
}
table[0] = constructHuffmanCode(1, val[0])
table[1] = constructHuffmanCode(2, val[1])
table[2] = constructHuffmanCode(1, val[0])
table[3] = constructHuffmanCode(3, val[2])
table[4] = constructHuffmanCode(1, val[0])
table[5] = constructHuffmanCode(2, val[1])
table[6] = constructHuffmanCode(1, val[0])
table[7] = constructHuffmanCode(3, val[3])
table_size = 8
}
for table_size != goal_size {
copy(table[table_size:], table[:uint(table_size)])
table_size <<= 1
}
return goal_size
}
+182
View File
@@ -0,0 +1,182 @@
package brotli
func utf8Position(last uint, c uint, clamp uint) uint {
if c < 128 {
return 0 /* Next one is the 'Byte 1' again. */
} else if c >= 192 { /* Next one is the 'Byte 2' of utf-8 encoding. */
return brotli_min_size_t(1, clamp)
} else {
/* Let's decide over the last byte if this ends the sequence. */
if last < 0xE0 {
return 0 /* Completed two or three byte coding. */ /* Next one is the 'Byte 3' of utf-8 encoding. */
} else {
return brotli_min_size_t(2, clamp)
}
}
}
func decideMultiByteStatsLevel(pos uint, len uint, mask uint, data []byte) uint {
var counts = [3]uint{0} /* should be 2, but 1 compresses better. */
var max_utf8 uint = 1
var last_c uint = 0
var i uint
for i = 0; i < len; i++ {
var c uint = uint(data[(pos+i)&mask])
counts[utf8Position(last_c, c, 2)]++
last_c = c
}
if counts[2] < 500 {
max_utf8 = 1
}
if counts[1]+counts[2] < 25 {
max_utf8 = 0
}
return max_utf8
}
func estimateBitCostsForLiteralsUTF8(pos uint, len uint, mask uint, data []byte, cost []float32) {
var max_utf8 uint = decideMultiByteStatsLevel(pos, uint(len), mask, data)
/* Bootstrap histograms. */
var histogram = [3][256]uint{[256]uint{0}}
var window_half uint = 495
var in_window uint = brotli_min_size_t(window_half, uint(len))
var in_window_utf8 = [3]uint{0}
/* max_utf8 is 0 (normal ASCII single byte modeling),
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
var i uint
{
var last_c uint = 0
var utf8_pos uint = 0
for i = 0; i < in_window; i++ {
var c uint = uint(data[(pos+i)&mask])
histogram[utf8_pos][c]++
in_window_utf8[utf8_pos]++
utf8_pos = utf8Position(last_c, c, max_utf8)
last_c = c
}
}
/* Compute bit costs with sliding window. */
for i = 0; i < len; i++ {
if i >= window_half {
var c uint
var last_c uint
if i < window_half+1 {
c = 0
} else {
c = uint(data[(pos+i-window_half-1)&mask])
}
if i < window_half+2 {
last_c = 0
} else {
last_c = uint(data[(pos+i-window_half-2)&mask])
}
/* Remove a byte in the past. */
var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
histogram[utf8_pos2][data[(pos+i-window_half)&mask]]--
in_window_utf8[utf8_pos2]--
}
if i+window_half < len {
var c uint = uint(data[(pos+i+window_half-1)&mask])
var last_c uint = uint(data[(pos+i+window_half-2)&mask])
/* Add a byte in the future. */
var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
histogram[utf8_pos2][data[(pos+i+window_half)&mask]]++
in_window_utf8[utf8_pos2]++
}
{
var c uint
var last_c uint
if i < 1 {
c = 0
} else {
c = uint(data[(pos+i-1)&mask])
}
if i < 2 {
last_c = 0
} else {
last_c = uint(data[(pos+i-2)&mask])
}
var utf8_pos uint = utf8Position(last_c, c, max_utf8)
var masked_pos uint = (pos + i) & mask
var histo uint = histogram[utf8_pos][data[masked_pos]]
var lit_cost float64
if histo == 0 {
histo = 1
}
lit_cost = fastLog2(in_window_utf8[utf8_pos]) - fastLog2(histo)
lit_cost += 0.02905
if lit_cost < 1.0 {
lit_cost *= 0.5
lit_cost += 0.5
}
/* Make the first bytes more expensive -- seems to help, not sure why.
Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */
if i < 2000 {
lit_cost += 0.7 - (float64(2000-i) / 2000.0 * 0.35)
}
cost[i] = float32(lit_cost)
}
}
}
func estimateBitCostsForLiterals(pos uint, len uint, mask uint, data []byte, cost []float32) {
if isMostlyUTF8(data, pos, mask, uint(len), kMinUTF8Ratio) {
estimateBitCostsForLiteralsUTF8(pos, uint(len), mask, data, cost)
return
} else {
var histogram = [256]uint{0}
var window_half uint = 2000
var in_window uint = brotli_min_size_t(window_half, uint(len))
var i uint
/* Bootstrap histogram. */
for i = 0; i < in_window; i++ {
histogram[data[(pos+i)&mask]]++
}
/* Compute bit costs with sliding window. */
for i = 0; i < len; i++ {
var histo uint
if i >= window_half {
/* Remove a byte in the past. */
histogram[data[(pos+i-window_half)&mask]]--
in_window--
}
if i+window_half < len {
/* Add a byte in the future. */
histogram[data[(pos+i+window_half)&mask]]++
in_window++
}
histo = histogram[data[(pos+i)&mask]]
if histo == 0 {
histo = 1
}
{
var lit_cost float64 = fastLog2(in_window) - fastLog2(histo)
lit_cost += 0.029
if lit_cost < 1.0 {
lit_cost *= 0.5
lit_cost += 0.5
}
cost[i] = float32(lit_cost)
}
}
}
}
+45
View File
@@ -0,0 +1,45 @@
package matchfinder
// An absoluteMatch is like a Match, but it stores indexes into the byte
// stream instead of lengths.
type absoluteMatch struct {
// Start is the index of the first byte.
Start int
// End is the index of the byte after the last byte
// (so that End - Start = Length).
End int
// Match is the index of the previous data that matches
// (Start - Match = Distance).
Match int
}
// A matchEmitter manages the output of matches for a MatchFinder.
type matchEmitter struct {
// Dst is the destination slice that Matches are added to.
Dst []Match
// NextEmit is the index of the next byte to emit.
NextEmit int
}
func (e *matchEmitter) emit(m absoluteMatch) {
e.Dst = append(e.Dst, Match{
Unmatched: m.Start - e.NextEmit,
Length: m.End - m.Start,
Distance: m.Start - m.Match,
})
e.NextEmit = m.End
}
// trim shortens m if it extends past maxEnd. Then if the length is at least
// minLength, the match is emitted.
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
if m.End > maxEnd {
m.End = maxEnd
}
if m.End-m.Start >= minLength {
e.emit(m)
}
}
+169
View File
@@ -0,0 +1,169 @@
package matchfinder
import (
"encoding/binary"
)
// M0 is an implementation of the MatchFinder interface based
// on the algorithm used by snappy, but modified to be more like the algorithm
// used by compression level 0 of the brotli reference implementation.
//
// It has a maximum block size of 65536 bytes.
type M0 struct {
// Lazy turns on "lazy matching," for higher compression but less speed.
Lazy bool
MaxDistance int
MaxLength int
}
func (M0) Reset() {}
const (
m0HashLen = 5
m0TableBits = 14
m0TableSize = 1 << m0TableBits
m0Shift = 32 - m0TableBits
// m0TableMask is redundant, but helps the compiler eliminate bounds
// checks.
m0TableMask = m0TableSize - 1
)
func (m M0) hash(data uint64) uint64 {
hash := (data << (64 - 8*m0HashLen)) * hashMul64
return hash >> (64 - m0TableBits)
}
// FindMatches looks for matches in src, appends them to dst, and returns dst.
// src must not be longer than 65536 bytes.
func (m M0) FindMatches(dst []Match, src []byte) []Match {
const inputMargin = 16 - 1
const minNonLiteralBlockSize = 1 + 1 + inputMargin
if len(src) < minNonLiteralBlockSize {
dst = append(dst, Match{
Unmatched: len(src),
})
return dst
}
if len(src) > 65536 {
panic("block too long")
}
var table [m0TableSize]uint16
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
nextHash := m.hash(binary.LittleEndian.Uint64(src[s:]))
for {
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned (or skipped), look at every third byte, etc.. When a match
// is found, immediately go back to looking at every byte. This is a
// small loss (~5% performance, ~0.1% density) for compressible data
// due to more bookkeeping, but for non-compressible data (such as
// JPEG) it's a huge win since the compressor quickly "realizes" the
// data is incompressible and doesn't bother looking for matches
// everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip := 32
nextS := s
candidate := 0
for {
s = nextS
bytesBetweenHashLookups := skip >> 5
nextS = s + bytesBetweenHashLookups
skip += bytesBetweenHashLookups
if nextS > sLimit {
goto emitRemainder
}
candidate = int(table[nextHash&m0TableMask])
table[nextHash&m0TableMask] = uint16(s)
nextHash = m.hash(binary.LittleEndian.Uint64(src[nextS:]))
if m.MaxDistance != 0 && s-candidate > m.MaxDistance {
continue
}
if binary.LittleEndian.Uint32(src[s:]) == binary.LittleEndian.Uint32(src[candidate:]) {
break
}
}
// Invariant: we have a 4-byte match at s.
base := s
s = extendMatch(src, candidate+4, s+4)
origBase := base
if m.Lazy && base+1 < sLimit {
newBase := base + 1
h := m.hash(binary.LittleEndian.Uint64(src[newBase:]))
newCandidate := int(table[h&m0TableMask])
table[h&m0TableMask] = uint16(newBase)
okDistance := true
if m.MaxDistance != 0 && newBase-newCandidate > m.MaxDistance {
okDistance = false
}
if okDistance && binary.LittleEndian.Uint32(src[newBase:]) == binary.LittleEndian.Uint32(src[newCandidate:]) {
newS := extendMatch(src, newCandidate+4, newBase+4)
if newS-newBase > s-base+1 {
s = newS
base = newBase
candidate = newCandidate
}
}
}
if m.MaxLength != 0 && s-base > m.MaxLength {
s = base + m.MaxLength
}
dst = append(dst, Match{
Unmatched: base - nextEmit,
Length: s - base,
Distance: base - candidate,
})
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if m.Lazy {
// If lazy matching is enabled, we update the hash table for
// every byte in the match.
for i := origBase + 2; i < s-1; i++ {
x := binary.LittleEndian.Uint64(src[i:])
table[m.hash(x)&m0TableMask] = uint16(i)
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := binary.LittleEndian.Uint64(src[s-1:])
prevHash := m.hash(x >> 0)
table[prevHash&m0TableMask] = uint16(s - 1)
nextHash = m.hash(x >> 8)
}
emitRemainder:
if nextEmit < len(src) {
dst = append(dst, Match{
Unmatched: len(src) - nextEmit,
})
}
return dst
}
+297
View File
@@ -0,0 +1,297 @@
package matchfinder
import (
"encoding/binary"
"math/bits"
"runtime"
)
// M4 is an implementation of the MatchFinder
// interface that uses a hash table to find matches,
// optional match chains,
// and the advanced parsing technique from
// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html.
type M4 struct {
// MaxDistance is the maximum distance (in bytes) to look back for
// a match. The default is 65535.
MaxDistance int
// MinLength is the length of the shortest match to return.
// The default is 4.
MinLength int
// HashLen is the number of bytes to use to calculate the hashes.
// The maximum is 8 and the default is 6.
HashLen int
// TableBits is the number of bits in the hash table indexes.
// The default is 17 (128K entries).
TableBits int
// ChainLength is how many entries to search on the "match chain" of older
// locations with the same hash as the current location.
ChainLength int
// DistanceBitCost is used when comparing two matches to see
// which is better. The comparison is primarily based on the length
// of the matches, but it can also take the distance into account,
// in terms of the number of bits needed to represent the distance.
// One byte of length is given a score of 256, so 32 (256/8) would
// be a reasonable first guess for the value of one bit.
// (The default is 0, which bases the comparison solely on length.)
DistanceBitCost int
table []uint32
chain []uint16
history []byte
}
func (q *M4) Reset() {
for i := range q.table {
q.table[i] = 0
}
q.history = q.history[:0]
q.chain = q.chain[:0]
}
func (q *M4) score(m absoluteMatch) int {
return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
}
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
if q.MaxDistance == 0 {
q.MaxDistance = 65535
}
if q.MinLength == 0 {
q.MinLength = 4
}
if q.HashLen == 0 {
q.HashLen = 6
}
if q.TableBits == 0 {
q.TableBits = 17
}
if len(q.table) < 1<<q.TableBits {
q.table = make([]uint32, 1<<q.TableBits)
}
e := matchEmitter{Dst: dst}
if len(q.history) > q.MaxDistance*2 {
// Trim down the history buffer.
delta := len(q.history) - q.MaxDistance
copy(q.history, q.history[delta:])
q.history = q.history[:q.MaxDistance]
if q.ChainLength > 0 {
q.chain = q.chain[:q.MaxDistance]
}
for i, v := range q.table {
newV := int(v) - delta
if newV < 0 {
newV = 0
}
q.table[i] = uint32(newV)
}
}
// Append src to the history buffer.
e.NextEmit = len(q.history)
q.history = append(q.history, src...)
if q.ChainLength > 0 {
q.chain = append(q.chain, make([]uint16, len(src))...)
}
src = q.history
// matches stores the matches that have been found but not emitted,
// in reverse order. (matches[0] is the most recent one.)
var matches [3]absoluteMatch
for i := e.NextEmit; i < len(src)-7; i++ {
if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
// We have found some matches, and we're far enough along that we probably
// won't find overlapping matches, so we might as well emit them.
if matches[1] != (absoluteMatch{}) {
e.trim(matches[1], matches[0].Start, q.MinLength)
}
e.emit(matches[0])
matches = [3]absoluteMatch{}
}
// Calculate and store the hash.
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - q.TableBits)
candidate := int(q.table[h])
q.table[h] = uint32(i)
if q.ChainLength > 0 && candidate != 0 {
delta := i - candidate
if delta < 1<<16 {
q.chain[i] = uint16(delta)
}
}
if i < matches[0].End && i != matches[0].End+2-q.HashLen {
continue
}
if candidate == 0 || i-candidate > q.MaxDistance {
continue
}
// Look for a match.
var currentMatch absoluteMatch
if i-candidate != matches[0].Start-matches[0].Match {
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
m := extendMatch2(src, i, candidate, e.NextEmit)
if m.End-m.Start > q.MinLength {
currentMatch = m
}
}
}
for j := 0; j < q.ChainLength; j++ {
delta := q.chain[candidate]
if delta == 0 {
break
}
candidate -= int(delta)
if candidate <= 0 || i-candidate > q.MaxDistance {
break
}
if i-candidate != matches[0].Start-matches[0].Match {
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
m := extendMatch2(src, i, candidate, e.NextEmit)
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
currentMatch = m
}
}
}
}
if currentMatch.End-currentMatch.Start < q.MinLength {
continue
}
overlapPenalty := 0
if matches[0] != (absoluteMatch{}) {
overlapPenalty = 275
if currentMatch.Start <= matches[1].End {
// This match would completely replace the previous match,
// so there is no penalty for overlap.
overlapPenalty = 0
}
}
if q.score(currentMatch) <= q.score(matches[0])+overlapPenalty {
continue
}
matches = [3]absoluteMatch{
currentMatch,
matches[0],
matches[1],
}
if matches[2] == (absoluteMatch{}) {
continue
}
// We have three matches, so it's time to emit one and/or eliminate one.
switch {
case matches[0].Start < matches[2].End:
// The first and third matches overlap; discard the one in between.
matches = [3]absoluteMatch{
matches[0],
matches[2],
absoluteMatch{},
}
case matches[0].Start < matches[2].End+q.MinLength:
// The first and third matches don't overlap, but there's no room for
// another match between them. Emit the first match and discard the second.
e.emit(matches[2])
matches = [3]absoluteMatch{
matches[0],
absoluteMatch{},
absoluteMatch{},
}
default:
// Emit the first match, shortening it if necessary to avoid overlap with the second.
e.trim(matches[2], matches[1].Start, q.MinLength)
matches[2] = absoluteMatch{}
}
}
// We've found all the matches now; emit the remaining ones.
if matches[1] != (absoluteMatch{}) {
e.trim(matches[1], matches[0].Start, q.MinLength)
}
if matches[0] != (absoluteMatch{}) {
e.emit(matches[0])
}
dst = e.Dst
if e.NextEmit < len(src) {
dst = append(dst, Match{
Unmatched: len(src) - e.NextEmit,
})
}
return dst
}
const hashMul64 = 0x1E35A7BD1E35A7BD
// extendMatch returns the largest k such that k <= len(src) and that
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
//
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
switch runtime.GOARCH {
case "amd64":
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
for j+8 < len(src) {
iBytes := binary.LittleEndian.Uint64(src[i:])
jBytes := binary.LittleEndian.Uint64(src[j:])
if iBytes != jBytes {
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
return j + bits.TrailingZeros64(iBytes^jBytes)>>3
}
i, j = i+8, j+8
}
case "386":
// On a 32-bit CPU, we do it 4 bytes at a time.
for j+4 < len(src) {
iBytes := binary.LittleEndian.Uint32(src[i:])
jBytes := binary.LittleEndian.Uint32(src[j:])
if iBytes != jBytes {
return j + bits.TrailingZeros32(iBytes^jBytes)>>3
}
i, j = i+4, j+4
}
}
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
}
return j
}
// Given a 4-byte match at src[start] and src[candidate], extendMatch2 extends it
// upward as far as possible, and downward no farther than to min.
func extendMatch2(src []byte, start, candidate, min int) absoluteMatch {
end := extendMatch(src, candidate+4, start+4)
for start > min && candidate > 0 && src[start-1] == src[candidate-1] {
start--
candidate--
}
return absoluteMatch{
Start: start,
End: end,
Match: candidate,
}
}
+103
View File
@@ -0,0 +1,103 @@
// The matchfinder package defines reusable components for data compression.
//
// Many compression libraries have two main parts:
// - Something that looks for repeated sequences of bytes
// - An encoder for the compressed data format (often an entropy coder)
//
// Although these are logically two separate steps, the implementations are
// usually closely tied together. You can't use flate's matcher with snappy's
// encoder, for example. This package defines interfaces and an intermediate
// representation to allow mixing and matching compression components.
package matchfinder
import "io"
// A Match is the basic unit of LZ77 compression.
type Match struct {
Unmatched int // the number of unmatched bytes since the previous match
Length int // the number of bytes in the matched string; it may be 0 at the end of the input
Distance int // how far back in the stream to copy from
}
// A MatchFinder performs the LZ77 stage of compression, looking for matches.
type MatchFinder interface {
// FindMatches looks for matches in src, appends them to dst, and returns dst.
FindMatches(dst []Match, src []byte) []Match
// Reset clears any internal state, preparing the MatchFinder to be used with
// a new stream.
Reset()
}
// An Encoder encodes the data in its final format.
type Encoder interface {
// Encode appends the encoded format of src to dst, using the match
// information from matches.
Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
// Reset clears any internal state, preparing the Encoder to be used with
// a new stream.
Reset()
}
// A Writer uses MatchFinder and Encoder to write compressed data to Dest.
type Writer struct {
Dest io.Writer
MatchFinder MatchFinder
Encoder Encoder
// BlockSize is the number of bytes to compress at a time. If it is zero,
// each Write operation will be treated as one block.
BlockSize int
err error
inBuf []byte
outBuf []byte
matches []Match
}
func (w *Writer) Write(p []byte) (n int, err error) {
if w.err != nil {
return 0, w.err
}
if w.BlockSize == 0 {
return w.writeBlock(p, false)
}
w.inBuf = append(w.inBuf, p...)
var pos int
for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
}
if pos > 0 {
n := copy(w.inBuf, w.inBuf[pos:])
w.inBuf = w.inBuf[:n]
}
return len(p), w.err
}
func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
w.outBuf = w.outBuf[:0]
w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
_, w.err = w.Dest.Write(w.outBuf)
return len(p), w.err
}
func (w *Writer) Close() error {
w.writeBlock(w.inBuf, true)
w.inBuf = w.inBuf[:0]
return w.err
}
func (w *Writer) Reset(newDest io.Writer) {
w.MatchFinder.Reset()
w.Encoder.Reset()
w.err = nil
w.inBuf = w.inBuf[:0]
w.outBuf = w.outBuf[:0]
w.matches = w.matches[:0]
w.Dest = newDest
}
+53
View File
@@ -0,0 +1,53 @@
package matchfinder
import "fmt"
// A TextEncoder is an Encoder that produces a human-readable representation of
// the LZ77 compression. Matches are replaced with <Length,Distance> symbols.
type TextEncoder struct{}
func (t TextEncoder) Reset() {}
func (t TextEncoder) Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte {
pos := 0
for _, m := range matches {
if m.Unmatched > 0 {
dst = append(dst, src[pos:pos+m.Unmatched]...)
pos += m.Unmatched
}
if m.Length > 0 {
dst = append(dst, []byte(fmt.Sprintf("<%d,%d>", m.Length, m.Distance))...)
pos += m.Length
}
}
if pos < len(src) {
dst = append(dst, src[pos:]...)
}
return dst
}
// A NoMatchFinder implements MatchFinder, but doesn't find any matches.
// It can be used to implement the equivalent of the standard library flate package's
// HuffmanOnly setting.
type NoMatchFinder struct{}
func (n NoMatchFinder) Reset() {}
func (n NoMatchFinder) FindMatches(dst []Match, src []byte) []Match {
return append(dst, Match{
Unmatched: len(src),
})
}
// AutoReset wraps a MatchFinder that can return references to data in previous
// blocks, and calls Reset before each block. It is useful for (e.g.) using a
// snappy Encoder with a MatchFinder designed for flate. (Snappy doesn't
// support references between blocks.)
type AutoReset struct {
MatchFinder
}
func (a AutoReset) FindMatches(dst []Match, src []byte) []Match {
a.Reset()
return a.MatchFinder.FindMatches(dst, src)
}
+66
View File
@@ -0,0 +1,66 @@
package brotli
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/*
Dynamically grows array capacity to at least the requested size
T: data type
A: array
C: capacity
R: requested size
*/
func brotli_ensure_capacity_uint8_t(a *[]byte, c *uint, r uint) {
if *c < r {
var new_size uint = *c
if new_size == 0 {
new_size = r
}
for new_size < r {
new_size *= 2
}
if cap(*a) < int(new_size) {
var new_array []byte = make([]byte, new_size)
if *c != 0 {
copy(new_array, (*a)[:*c])
}
*a = new_array
} else {
*a = (*a)[:new_size]
}
*c = new_size
}
}
func brotli_ensure_capacity_uint32_t(a *[]uint32, c *uint, r uint) {
var new_array []uint32
if *c < r {
var new_size uint = *c
if new_size == 0 {
new_size = r
}
for new_size < r {
new_size *= 2
}
if cap(*a) < int(new_size) {
new_array = make([]uint32, new_size)
if *c != 0 {
copy(new_array, (*a)[:*c])
}
*a = new_array
} else {
*a = (*a)[:new_size]
}
*c = new_size
}
}
+574
View File
@@ -0,0 +1,574 @@
package brotli
import (
"sync"
)
/* Copyright 2014 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
type metaBlockSplit struct {
literal_split blockSplit
command_split blockSplit
distance_split blockSplit
literal_context_map []uint32
literal_context_map_size uint
distance_context_map []uint32
distance_context_map_size uint
literal_histograms []histogramLiteral
literal_histograms_size uint
command_histograms []histogramCommand
command_histograms_size uint
distance_histograms []histogramDistance
distance_histograms_size uint
}
var metaBlockPool sync.Pool
func getMetaBlockSplit() *metaBlockSplit {
mb, _ := metaBlockPool.Get().(*metaBlockSplit)
if mb == nil {
mb = &metaBlockSplit{}
} else {
initBlockSplit(&mb.literal_split)
initBlockSplit(&mb.command_split)
initBlockSplit(&mb.distance_split)
mb.literal_context_map = mb.literal_context_map[:0]
mb.literal_context_map_size = 0
mb.distance_context_map = mb.distance_context_map[:0]
mb.distance_context_map_size = 0
mb.literal_histograms = mb.literal_histograms[:0]
mb.command_histograms = mb.command_histograms[:0]
mb.distance_histograms = mb.distance_histograms[:0]
}
return mb
}
func freeMetaBlockSplit(mb *metaBlockSplit) {
metaBlockPool.Put(mb)
}
func initDistanceParams(params *encoderParams, npostfix uint32, ndirect uint32) {
var dist_params *distanceParams = &params.dist
var alphabet_size uint32
var max_distance uint32
dist_params.distance_postfix_bits = npostfix
dist_params.num_direct_distance_codes = ndirect
alphabet_size = uint32(distanceAlphabetSize(uint(npostfix), uint(ndirect), maxDistanceBits))
max_distance = ndirect + (1 << (maxDistanceBits + npostfix + 2)) - (1 << (npostfix + 2))
if params.large_window {
var bound = [maxNpostfix + 1]uint32{0, 4, 12, 28}
var postfix uint32 = 1 << npostfix
alphabet_size = uint32(distanceAlphabetSize(uint(npostfix), uint(ndirect), largeMaxDistanceBits))
/* The maximum distance is set so that no distance symbol used can encode
a distance larger than BROTLI_MAX_ALLOWED_DISTANCE with all
its extra bits set. */
if ndirect < bound[npostfix] {
max_distance = maxAllowedDistance - (bound[npostfix] - ndirect)
} else if ndirect >= bound[npostfix]+postfix {
max_distance = (3 << 29) - 4 + (ndirect - bound[npostfix])
} else {
max_distance = maxAllowedDistance
}
}
dist_params.alphabet_size = alphabet_size
dist_params.max_distance = uint(max_distance)
}
func recomputeDistancePrefixes(cmds []command, orig_params *distanceParams, new_params *distanceParams) {
if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes {
return
}
for i := range cmds {
var cmd *command = &cmds[i]
if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
prefixEncodeCopyDistance(uint(commandRestoreDistanceCode(cmd, orig_params)), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_)
}
}
}
func computeDistanceCost(cmds []command, orig_params *distanceParams, new_params *distanceParams, cost *float64) bool {
var equal_params bool = false
var dist_prefix uint16
var dist_extra uint32
var extra_bits float64 = 0.0
var histo histogramDistance
histogramClearDistance(&histo)
if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes {
equal_params = true
}
for i := range cmds {
cmd := &cmds[i]
if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
if equal_params {
dist_prefix = cmd.dist_prefix_
} else {
var distance uint32 = commandRestoreDistanceCode(cmd, orig_params)
if distance > uint32(new_params.max_distance) {
return false
}
prefixEncodeCopyDistance(uint(distance), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &dist_prefix, &dist_extra)
}
histogramAddDistance(&histo, uint(dist_prefix)&0x3FF)
extra_bits += float64(dist_prefix >> 10)
}
}
*cost = populationCostDistance(&histo) + extra_bits
return true
}
var buildMetaBlock_kMaxNumberOfHistograms uint = 256
func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParams, prev_byte byte, prev_byte2 byte, cmds []command, literal_context_mode int, mb *metaBlockSplit) {
var distance_histograms []histogramDistance
var literal_histograms []histogramLiteral
var literal_context_modes []int = nil
var literal_histograms_size uint
var distance_histograms_size uint
var i uint
var literal_context_multiplier uint = 1
var npostfix uint32
var ndirect_msb uint32 = 0
var check_orig bool = true
var best_dist_cost float64 = 1e99
var orig_params encoderParams = *params
/* Histogram ids need to fit in one byte. */
var new_params encoderParams = *params
for npostfix = 0; npostfix <= maxNpostfix; npostfix++ {
for ; ndirect_msb < 16; ndirect_msb++ {
var ndirect uint32 = ndirect_msb << npostfix
var skip bool
var dist_cost float64
initDistanceParams(&new_params, npostfix, ndirect)
if npostfix == orig_params.dist.distance_postfix_bits && ndirect == orig_params.dist.num_direct_distance_codes {
check_orig = false
}
skip = !computeDistanceCost(cmds, &orig_params.dist, &new_params.dist, &dist_cost)
if skip || (dist_cost > best_dist_cost) {
break
}
best_dist_cost = dist_cost
params.dist = new_params.dist
}
if ndirect_msb > 0 {
ndirect_msb--
}
ndirect_msb /= 2
}
if check_orig {
var dist_cost float64
computeDistanceCost(cmds, &orig_params.dist, &orig_params.dist, &dist_cost)
if dist_cost < best_dist_cost {
/* NB: currently unused; uncomment when more param tuning is added. */
/* best_dist_cost = dist_cost; */
params.dist = orig_params.dist
}
}
recomputeDistancePrefixes(cmds, &orig_params.dist, &params.dist)
splitBlock(cmds, ringbuffer, pos, mask, params, &mb.literal_split, &mb.command_split, &mb.distance_split)
if !params.disable_literal_context_modeling {
literal_context_multiplier = 1 << literalContextBits
literal_context_modes = make([]int, (mb.literal_split.num_types))
for i = 0; i < mb.literal_split.num_types; i++ {
literal_context_modes[i] = literal_context_mode
}
}
literal_histograms_size = mb.literal_split.num_types * literal_context_multiplier
literal_histograms = make([]histogramLiteral, literal_histograms_size)
clearHistogramsLiteral(literal_histograms, literal_histograms_size)
distance_histograms_size = mb.distance_split.num_types << distanceContextBits
distance_histograms = make([]histogramDistance, distance_histograms_size)
clearHistogramsDistance(distance_histograms, distance_histograms_size)
mb.command_histograms_size = mb.command_split.num_types
if cap(mb.command_histograms) < int(mb.command_histograms_size) {
mb.command_histograms = make([]histogramCommand, (mb.command_histograms_size))
} else {
mb.command_histograms = mb.command_histograms[:mb.command_histograms_size]
}
clearHistogramsCommand(mb.command_histograms, mb.command_histograms_size)
buildHistogramsWithContext(cmds, &mb.literal_split, &mb.command_split, &mb.distance_split, ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, literal_histograms, mb.command_histograms, distance_histograms)
literal_context_modes = nil
mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits
if cap(mb.literal_context_map) < int(mb.literal_context_map_size) {
mb.literal_context_map = make([]uint32, (mb.literal_context_map_size))
} else {
mb.literal_context_map = mb.literal_context_map[:mb.literal_context_map_size]
}
mb.literal_histograms_size = mb.literal_context_map_size
if cap(mb.literal_histograms) < int(mb.literal_histograms_size) {
mb.literal_histograms = make([]histogramLiteral, (mb.literal_histograms_size))
} else {
mb.literal_histograms = mb.literal_histograms[:mb.literal_histograms_size]
}
clusterHistogramsLiteral(literal_histograms, literal_histograms_size, buildMetaBlock_kMaxNumberOfHistograms, mb.literal_histograms, &mb.literal_histograms_size, mb.literal_context_map)
literal_histograms = nil
if params.disable_literal_context_modeling {
/* Distribute assignment to all contexts. */
for i = mb.literal_split.num_types; i != 0; {
var j uint = 0
i--
for ; j < 1<<literalContextBits; j++ {
mb.literal_context_map[(i<<literalContextBits)+j] = mb.literal_context_map[i]
}
}
}
mb.distance_context_map_size = mb.distance_split.num_types << distanceContextBits
if cap(mb.distance_context_map) < int(mb.distance_context_map_size) {
mb.distance_context_map = make([]uint32, (mb.distance_context_map_size))
} else {
mb.distance_context_map = mb.distance_context_map[:mb.distance_context_map_size]
}
mb.distance_histograms_size = mb.distance_context_map_size
if cap(mb.distance_histograms) < int(mb.distance_histograms_size) {
mb.distance_histograms = make([]histogramDistance, (mb.distance_histograms_size))
} else {
mb.distance_histograms = mb.distance_histograms[:mb.distance_histograms_size]
}
clusterHistogramsDistance(distance_histograms, mb.distance_context_map_size, buildMetaBlock_kMaxNumberOfHistograms, mb.distance_histograms, &mb.distance_histograms_size, mb.distance_context_map)
distance_histograms = nil
}
const maxStaticContexts = 13
/* Greedy block splitter for one block category (literal, command or distance).
Gathers histograms for all context buckets. */
type contextBlockSplitter struct {
alphabet_size_ uint
num_contexts_ uint
max_block_types_ uint
min_block_size_ uint
split_threshold_ float64
num_blocks_ uint
split_ *blockSplit
histograms_ []histogramLiteral
histograms_size_ *uint
target_block_size_ uint
block_size_ uint
curr_histogram_ix_ uint
last_histogram_ix_ [2]uint
last_entropy_ [2 * maxStaticContexts]float64
merge_last_count_ uint
}
func initContextBlockSplitter(self *contextBlockSplitter, alphabet_size uint, num_contexts uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramLiteral, histograms_size *uint) {
var max_num_blocks uint = num_symbols/min_block_size + 1
var max_num_types uint
assert(num_contexts <= maxStaticContexts)
self.alphabet_size_ = alphabet_size
self.num_contexts_ = num_contexts
self.max_block_types_ = maxNumberOfBlockTypes / num_contexts
self.min_block_size_ = min_block_size
self.split_threshold_ = split_threshold
self.num_blocks_ = 0
self.split_ = split
self.histograms_size_ = histograms_size
self.target_block_size_ = min_block_size
self.block_size_ = 0
self.curr_histogram_ix_ = 0
self.merge_last_count_ = 0
/* We have to allocate one more histogram than the maximum number of block
types for the current histogram when the meta-block is too big. */
max_num_types = brotli_min_size_t(max_num_blocks, self.max_block_types_+1)
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
split.num_blocks = max_num_blocks
*histograms_size = max_num_types * num_contexts
if histograms == nil || cap(*histograms) < int(*histograms_size) {
*histograms = make([]histogramLiteral, (*histograms_size))
} else {
*histograms = (*histograms)[:*histograms_size]
}
self.histograms_ = *histograms
/* Clear only current histogram. */
clearHistogramsLiteral(self.histograms_[0:], num_contexts)
self.last_histogram_ix_[1] = 0
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
func contextBlockSplitterFinishBlock(self *contextBlockSplitter, is_final bool) {
var split *blockSplit = self.split_
var num_contexts uint = self.num_contexts_
var last_entropy []float64 = self.last_entropy_[:]
var histograms []histogramLiteral = self.histograms_
if self.block_size_ < self.min_block_size_ {
self.block_size_ = self.min_block_size_
}
if self.num_blocks_ == 0 {
var i uint
/* Create first block. */
split.lengths[0] = uint32(self.block_size_)
split.types[0] = 0
for i = 0; i < num_contexts; i++ {
last_entropy[i] = bitsEntropy(histograms[i].data_[:], self.alphabet_size_)
last_entropy[num_contexts+i] = last_entropy[i]
}
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_ += num_contexts
if self.curr_histogram_ix_ < *self.histograms_size_ {
clearHistogramsLiteral(self.histograms_[self.curr_histogram_ix_:], self.num_contexts_)
}
self.block_size_ = 0
} else if self.block_size_ > 0 {
var entropy [maxStaticContexts]float64
var combined_histo []histogramLiteral = make([]histogramLiteral, (2 * num_contexts))
var combined_entropy [2 * maxStaticContexts]float64
var diff = [2]float64{0.0}
/* Try merging the set of histograms for the current block type with the
respective set of histograms for the last and second last block types.
Decide over the split based on the total reduction of entropy across
all contexts. */
var i uint
for i = 0; i < num_contexts; i++ {
var curr_histo_ix uint = self.curr_histogram_ix_ + i
var j uint
entropy[i] = bitsEntropy(histograms[curr_histo_ix].data_[:], self.alphabet_size_)
for j = 0; j < 2; j++ {
var jx uint = j*num_contexts + i
var last_histogram_ix uint = self.last_histogram_ix_[j] + i
combined_histo[jx] = histograms[curr_histo_ix]
histogramAddHistogramLiteral(&combined_histo[jx], &histograms[last_histogram_ix])
combined_entropy[jx] = bitsEntropy(combined_histo[jx].data_[0:], self.alphabet_size_)
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx]
}
}
if split.num_types < self.max_block_types_ && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
/* Create new block. */
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = byte(split.num_types)
self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = split.num_types * num_contexts
for i = 0; i < num_contexts; i++ {
last_entropy[num_contexts+i] = last_entropy[i]
last_entropy[i] = entropy[i]
}
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_ += num_contexts
if self.curr_histogram_ix_ < *self.histograms_size_ {
clearHistogramsLiteral(self.histograms_[self.curr_histogram_ix_:], self.num_contexts_)
}
self.block_size_ = 0
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else if diff[1] < diff[0]-20.0 {
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
/* Combine this block with second last block. */
var tmp uint = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
self.last_histogram_ix_[1] = tmp
for i = 0; i < num_contexts; i++ {
histograms[self.last_histogram_ix_[0]+i] = combined_histo[num_contexts+i]
last_entropy[num_contexts+i] = last_entropy[i]
last_entropy[i] = combined_entropy[num_contexts+i]
histogramClearLiteral(&histograms[self.curr_histogram_ix_+i])
}
self.num_blocks_++
self.block_size_ = 0
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else {
/* Combine this block with last block. */
split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
for i = 0; i < num_contexts; i++ {
histograms[self.last_histogram_ix_[0]+i] = combined_histo[i]
last_entropy[i] = combined_entropy[i]
if split.num_types == 1 {
last_entropy[num_contexts+i] = last_entropy[i]
}
histogramClearLiteral(&histograms[self.curr_histogram_ix_+i])
}
self.block_size_ = 0
self.merge_last_count_++
if self.merge_last_count_ > 1 {
self.target_block_size_ += self.min_block_size_
}
}
combined_histo = nil
}
if is_final {
*self.histograms_size_ = split.num_types * num_contexts
split.num_blocks = self.num_blocks_
}
}
/* Adds the next symbol to the current block type and context. When the
current block reaches the target size, decides on merging the block. */
func contextBlockSplitterAddSymbol(self *contextBlockSplitter, symbol uint, context uint) {
histogramAddLiteral(&self.histograms_[self.curr_histogram_ix_+context], symbol)
self.block_size_++
if self.block_size_ == self.target_block_size_ {
contextBlockSplitterFinishBlock(self, false) /* is_final = */
}
}
func mapStaticContexts(num_contexts uint, static_context_map []uint32, mb *metaBlockSplit) {
var i uint
mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits
if cap(mb.literal_context_map) < int(mb.literal_context_map_size) {
mb.literal_context_map = make([]uint32, (mb.literal_context_map_size))
} else {
mb.literal_context_map = mb.literal_context_map[:mb.literal_context_map_size]
}
for i = 0; i < mb.literal_split.num_types; i++ {
var offset uint32 = uint32(i * num_contexts)
var j uint
for j = 0; j < 1<<literalContextBits; j++ {
mb.literal_context_map[(i<<literalContextBits)+j] = offset + static_context_map[j]
}
}
}
func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) {
var lit_blocks struct {
plain blockSplitterLiteral
ctx contextBlockSplitter
}
var cmd_blocks blockSplitterCommand
var dist_blocks blockSplitterDistance
var num_literals uint = 0
for i := range commands {
num_literals += uint(commands[i].insert_len_)
}
if num_contexts == 1 {
initBlockSplitterLiteral(&lit_blocks.plain, 256, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size)
} else {
initContextBlockSplitter(&lit_blocks.ctx, 256, num_contexts, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size)
}
initBlockSplitterCommand(&cmd_blocks, numCommandSymbols, 1024, 500.0, uint(len(commands)), &mb.command_split, &mb.command_histograms, &mb.command_histograms_size)
initBlockSplitterDistance(&dist_blocks, 64, 512, 100.0, uint(len(commands)), &mb.distance_split, &mb.distance_histograms, &mb.distance_histograms_size)
for _, cmd := range commands {
var j uint
blockSplitterAddSymbolCommand(&cmd_blocks, uint(cmd.cmd_prefix_))
for j = uint(cmd.insert_len_); j != 0; j-- {
var literal byte = ringbuffer[pos&mask]
if num_contexts == 1 {
blockSplitterAddSymbolLiteral(&lit_blocks.plain, uint(literal))
} else {
var context uint = uint(getContext(prev_byte, prev_byte2, literal_context_lut))
contextBlockSplitterAddSymbol(&lit_blocks.ctx, uint(literal), uint(static_context_map[context]))
}
prev_byte2 = prev_byte
prev_byte = literal
pos++
}
pos += uint(commandCopyLen(&cmd))
if commandCopyLen(&cmd) != 0 {
prev_byte2 = ringbuffer[(pos-2)&mask]
prev_byte = ringbuffer[(pos-1)&mask]
if cmd.cmd_prefix_ >= 128 {
blockSplitterAddSymbolDistance(&dist_blocks, uint(cmd.dist_prefix_)&0x3FF)
}
}
}
if num_contexts == 1 {
blockSplitterFinishBlockLiteral(&lit_blocks.plain, true) /* is_final = */
} else {
contextBlockSplitterFinishBlock(&lit_blocks.ctx, true) /* is_final = */
}
blockSplitterFinishBlockCommand(&cmd_blocks, true) /* is_final = */
blockSplitterFinishBlockDistance(&dist_blocks, true) /* is_final = */
if num_contexts > 1 {
mapStaticContexts(num_contexts, static_context_map, mb)
}
}
func buildMetaBlockGreedy(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) {
if num_contexts == 1 {
buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, 1, nil, commands, mb)
} else {
buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, mb)
}
}
func optimizeHistograms(num_distance_codes uint32, mb *metaBlockSplit) {
var good_for_rle [numCommandSymbols]byte
var i uint
for i = 0; i < mb.literal_histograms_size; i++ {
optimizeHuffmanCountsForRLE(256, mb.literal_histograms[i].data_[:], good_for_rle[:])
}
for i = 0; i < mb.command_histograms_size; i++ {
optimizeHuffmanCountsForRLE(numCommandSymbols, mb.command_histograms[i].data_[:], good_for_rle[:])
}
for i = 0; i < mb.distance_histograms_size; i++ {
optimizeHuffmanCountsForRLE(uint(num_distance_codes), mb.distance_histograms[i].data_[:], good_for_rle[:])
}
}
+165
View File
@@ -0,0 +1,165 @@
package brotli
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Greedy block splitter for one block category (literal, command or distance).
*/
type blockSplitterCommand struct {
alphabet_size_ uint
min_block_size_ uint
split_threshold_ float64
num_blocks_ uint
split_ *blockSplit
histograms_ []histogramCommand
histograms_size_ *uint
target_block_size_ uint
block_size_ uint
curr_histogram_ix_ uint
last_histogram_ix_ [2]uint
last_entropy_ [2]float64
merge_last_count_ uint
}
func initBlockSplitterCommand(self *blockSplitterCommand, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramCommand, histograms_size *uint) {
var max_num_blocks uint = num_symbols/min_block_size + 1
var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
/* We have to allocate one more histogram than the maximum number of block
types for the current histogram when the meta-block is too big. */
self.alphabet_size_ = alphabet_size
self.min_block_size_ = min_block_size
self.split_threshold_ = split_threshold
self.num_blocks_ = 0
self.split_ = split
self.histograms_size_ = histograms_size
self.target_block_size_ = min_block_size
self.block_size_ = 0
self.curr_histogram_ix_ = 0
self.merge_last_count_ = 0
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
self.split_.num_blocks = max_num_blocks
*histograms_size = max_num_types
if histograms == nil || cap(*histograms) < int(*histograms_size) {
*histograms = make([]histogramCommand, (*histograms_size))
} else {
*histograms = (*histograms)[:*histograms_size]
}
self.histograms_ = *histograms
/* Clear only current histogram. */
histogramClearCommand(&self.histograms_[0])
self.last_histogram_ix_[1] = 0
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
func blockSplitterFinishBlockCommand(self *blockSplitterCommand, is_final bool) {
var split *blockSplit = self.split_
var last_entropy []float64 = self.last_entropy_[:]
var histograms []histogramCommand = self.histograms_
self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
if self.num_blocks_ == 0 {
/* Create first block. */
split.lengths[0] = uint32(self.block_size_)
split.types[0] = 0
last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
last_entropy[1] = last_entropy[0]
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearCommand(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
} else if self.block_size_ > 0 {
var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
var combined_histo [2]histogramCommand
var combined_entropy [2]float64
var diff [2]float64
var j uint
for j = 0; j < 2; j++ {
var last_histogram_ix uint = self.last_histogram_ix_[j]
combined_histo[j] = histograms[self.curr_histogram_ix_]
histogramAddHistogramCommand(&combined_histo[j], &histograms[last_histogram_ix])
combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
diff[j] = combined_entropy[j] - entropy - last_entropy[j]
}
if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
/* Create new block. */
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = byte(split.num_types)
self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = uint(byte(split.num_types))
last_entropy[1] = last_entropy[0]
last_entropy[0] = entropy
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearCommand(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else if diff[1] < diff[0]-20.0 {
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
/* Combine this block with second last block. */
var tmp uint = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
self.last_histogram_ix_[1] = tmp
histograms[self.last_histogram_ix_[0]] = combined_histo[1]
last_entropy[1] = last_entropy[0]
last_entropy[0] = combined_entropy[1]
self.num_blocks_++
self.block_size_ = 0
histogramClearCommand(&histograms[self.curr_histogram_ix_])
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else {
/* Combine this block with last block. */
split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
histograms[self.last_histogram_ix_[0]] = combined_histo[0]
last_entropy[0] = combined_entropy[0]
if split.num_types == 1 {
last_entropy[1] = last_entropy[0]
}
self.block_size_ = 0
histogramClearCommand(&histograms[self.curr_histogram_ix_])
self.merge_last_count_++
if self.merge_last_count_ > 1 {
self.target_block_size_ += self.min_block_size_
}
}
}
if is_final {
*self.histograms_size_ = split.num_types
split.num_blocks = self.num_blocks_
}
}
/* Adds the next symbol to the current histogram. When the current histogram
reaches the target size, decides on merging the block. */
func blockSplitterAddSymbolCommand(self *blockSplitterCommand, symbol uint) {
histogramAddCommand(&self.histograms_[self.curr_histogram_ix_], symbol)
self.block_size_++
if self.block_size_ == self.target_block_size_ {
blockSplitterFinishBlockCommand(self, false) /* is_final = */
}
}
+165
View File
@@ -0,0 +1,165 @@
package brotli
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Greedy block splitter for one block category (literal, command or distance).
*/
type blockSplitterDistance struct {
alphabet_size_ uint
min_block_size_ uint
split_threshold_ float64
num_blocks_ uint
split_ *blockSplit
histograms_ []histogramDistance
histograms_size_ *uint
target_block_size_ uint
block_size_ uint
curr_histogram_ix_ uint
last_histogram_ix_ [2]uint
last_entropy_ [2]float64
merge_last_count_ uint
}
func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
var max_num_blocks uint = num_symbols/min_block_size + 1
var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
/* We have to allocate one more histogram than the maximum number of block
types for the current histogram when the meta-block is too big. */
self.alphabet_size_ = alphabet_size
self.min_block_size_ = min_block_size
self.split_threshold_ = split_threshold
self.num_blocks_ = 0
self.split_ = split
self.histograms_size_ = histograms_size
self.target_block_size_ = min_block_size
self.block_size_ = 0
self.curr_histogram_ix_ = 0
self.merge_last_count_ = 0
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
self.split_.num_blocks = max_num_blocks
*histograms_size = max_num_types
if histograms == nil || cap(*histograms) < int(*histograms_size) {
*histograms = make([]histogramDistance, *histograms_size)
} else {
*histograms = (*histograms)[:*histograms_size]
}
self.histograms_ = *histograms
/* Clear only current histogram. */
histogramClearDistance(&self.histograms_[0])
self.last_histogram_ix_[1] = 0
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
var split *blockSplit = self.split_
var last_entropy []float64 = self.last_entropy_[:]
var histograms []histogramDistance = self.histograms_
self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
if self.num_blocks_ == 0 {
/* Create first block. */
split.lengths[0] = uint32(self.block_size_)
split.types[0] = 0
last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
last_entropy[1] = last_entropy[0]
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearDistance(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
} else if self.block_size_ > 0 {
var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
var combined_histo [2]histogramDistance
var combined_entropy [2]float64
var diff [2]float64
var j uint
for j = 0; j < 2; j++ {
var last_histogram_ix uint = self.last_histogram_ix_[j]
combined_histo[j] = histograms[self.curr_histogram_ix_]
histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
diff[j] = combined_entropy[j] - entropy - last_entropy[j]
}
if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
/* Create new block. */
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = byte(split.num_types)
self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = uint(byte(split.num_types))
last_entropy[1] = last_entropy[0]
last_entropy[0] = entropy
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearDistance(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else if diff[1] < diff[0]-20.0 {
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
/* Combine this block with second last block. */
var tmp uint = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
self.last_histogram_ix_[1] = tmp
histograms[self.last_histogram_ix_[0]] = combined_histo[1]
last_entropy[1] = last_entropy[0]
last_entropy[0] = combined_entropy[1]
self.num_blocks_++
self.block_size_ = 0
histogramClearDistance(&histograms[self.curr_histogram_ix_])
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else {
/* Combine this block with last block. */
split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
histograms[self.last_histogram_ix_[0]] = combined_histo[0]
last_entropy[0] = combined_entropy[0]
if split.num_types == 1 {
last_entropy[1] = last_entropy[0]
}
self.block_size_ = 0
histogramClearDistance(&histograms[self.curr_histogram_ix_])
self.merge_last_count_++
if self.merge_last_count_ > 1 {
self.target_block_size_ += self.min_block_size_
}
}
}
if is_final {
*self.histograms_size_ = split.num_types
split.num_blocks = self.num_blocks_
}
}
/* Adds the next symbol to the current histogram. When the current histogram
reaches the target size, decides on merging the block. */
func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
self.block_size_++
if self.block_size_ == self.target_block_size_ {
blockSplitterFinishBlockDistance(self, false) /* is_final = */
}
}
+165
View File
@@ -0,0 +1,165 @@
package brotli
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Greedy block splitter for one block category (literal, command or distance).
*/
type blockSplitterLiteral struct {
alphabet_size_ uint
min_block_size_ uint
split_threshold_ float64
num_blocks_ uint
split_ *blockSplit
histograms_ []histogramLiteral
histograms_size_ *uint
target_block_size_ uint
block_size_ uint
curr_histogram_ix_ uint
last_histogram_ix_ [2]uint
last_entropy_ [2]float64
merge_last_count_ uint
}
func initBlockSplitterLiteral(self *blockSplitterLiteral, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramLiteral, histograms_size *uint) {
var max_num_blocks uint = num_symbols/min_block_size + 1
var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
/* We have to allocate one more histogram than the maximum number of block
types for the current histogram when the meta-block is too big. */
self.alphabet_size_ = alphabet_size
self.min_block_size_ = min_block_size
self.split_threshold_ = split_threshold
self.num_blocks_ = 0
self.split_ = split
self.histograms_size_ = histograms_size
self.target_block_size_ = min_block_size
self.block_size_ = 0
self.curr_histogram_ix_ = 0
self.merge_last_count_ = 0
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
self.split_.num_blocks = max_num_blocks
*histograms_size = max_num_types
if histograms == nil || cap(*histograms) < int(*histograms_size) {
*histograms = make([]histogramLiteral, *histograms_size)
} else {
*histograms = (*histograms)[:*histograms_size]
}
self.histograms_ = *histograms
/* Clear only current histogram. */
histogramClearLiteral(&self.histograms_[0])
self.last_histogram_ix_[1] = 0
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
func blockSplitterFinishBlockLiteral(self *blockSplitterLiteral, is_final bool) {
var split *blockSplit = self.split_
var last_entropy []float64 = self.last_entropy_[:]
var histograms []histogramLiteral = self.histograms_
self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
if self.num_blocks_ == 0 {
/* Create first block. */
split.lengths[0] = uint32(self.block_size_)
split.types[0] = 0
last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
last_entropy[1] = last_entropy[0]
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearLiteral(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
} else if self.block_size_ > 0 {
var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
var combined_histo [2]histogramLiteral
var combined_entropy [2]float64
var diff [2]float64
var j uint
for j = 0; j < 2; j++ {
var last_histogram_ix uint = self.last_histogram_ix_[j]
combined_histo[j] = histograms[self.curr_histogram_ix_]
histogramAddHistogramLiteral(&combined_histo[j], &histograms[last_histogram_ix])
combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
diff[j] = combined_entropy[j] - entropy - last_entropy[j]
}
if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
/* Create new block. */
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = byte(split.num_types)
self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = uint(byte(split.num_types))
last_entropy[1] = last_entropy[0]
last_entropy[0] = entropy
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearLiteral(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else if diff[1] < diff[0]-20.0 {
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
/* Combine this block with second last block. */
var tmp uint = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
self.last_histogram_ix_[1] = tmp
histograms[self.last_histogram_ix_[0]] = combined_histo[1]
last_entropy[1] = last_entropy[0]
last_entropy[0] = combined_entropy[1]
self.num_blocks_++
self.block_size_ = 0
histogramClearLiteral(&histograms[self.curr_histogram_ix_])
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else {
/* Combine this block with last block. */
split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
histograms[self.last_histogram_ix_[0]] = combined_histo[0]
last_entropy[0] = combined_entropy[0]
if split.num_types == 1 {
last_entropy[1] = last_entropy[0]
}
self.block_size_ = 0
histogramClearLiteral(&histograms[self.curr_histogram_ix_])
self.merge_last_count_++
if self.merge_last_count_ > 1 {
self.target_block_size_ += self.min_block_size_
}
}
}
if is_final {
*self.histograms_size_ = split.num_types
split.num_blocks = self.num_blocks_
}
}
/* Adds the next symbol to the current histogram. When the current histogram
reaches the target size, decides on merging the block. */
func blockSplitterAddSymbolLiteral(self *blockSplitterLiteral, symbol uint) {
histogramAddLiteral(&self.histograms_[self.curr_histogram_ix_], symbol)
self.block_size_++
if self.block_size_ == self.target_block_size_ {
blockSplitterFinishBlockLiteral(self, false) /* is_final = */
}
}
+37
View File
@@ -0,0 +1,37 @@
package brotli
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Parameters for the Brotli encoder with chosen quality levels. */
type hasherParams struct {
type_ int
bucket_bits int
block_bits int
hash_len int
num_last_distances_to_check int
}
type distanceParams struct {
distance_postfix_bits uint32
num_direct_distance_codes uint32
alphabet_size uint32
max_distance uint
}
/* Encoding parameters */
type encoderParams struct {
mode int
quality int
lgwin uint
lgblock int
size_hint uint
disable_literal_context_modeling bool
large_window bool
hasher hasherParams
dist distanceParams
dictionary encoderDictionary
}
+103
View File
@@ -0,0 +1,103 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func brotli_min_double(a float64, b float64) float64 {
if a < b {
return a
} else {
return b
}
}
func brotli_max_double(a float64, b float64) float64 {
if a > b {
return a
} else {
return b
}
}
func brotli_min_float(a float32, b float32) float32 {
if a < b {
return a
} else {
return b
}
}
func brotli_max_float(a float32, b float32) float32 {
if a > b {
return a
} else {
return b
}
}
func brotli_min_int(a int, b int) int {
if a < b {
return a
} else {
return b
}
}
func brotli_max_int(a int, b int) int {
if a > b {
return a
} else {
return b
}
}
func brotli_min_size_t(a uint, b uint) uint {
if a < b {
return a
} else {
return b
}
}
func brotli_max_size_t(a uint, b uint) uint {
if a > b {
return a
} else {
return b
}
}
func brotli_min_uint32_t(a uint32, b uint32) uint32 {
if a < b {
return a
} else {
return b
}
}
func brotli_max_uint32_t(a uint32, b uint32) uint32 {
if a > b {
return a
} else {
return b
}
}
func brotli_min_uint8_t(a byte, b byte) byte {
if a < b {
return a
} else {
return b
}
}
func brotli_max_uint8_t(a byte, b byte) byte {
if a > b {
return a
} else {
return b
}
}
+30
View File
@@ -0,0 +1,30 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions for encoding of integers into prefix codes the amount of extra
bits, and the actual values of the extra bits. */
/* Here distance_code is an intermediate code, i.e. one of the special codes or
the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
func prefixEncodeCopyDistance(distance_code uint, num_direct_codes uint, postfix_bits uint, code *uint16, extra_bits *uint32) {
if distance_code < numDistanceShortCodes+num_direct_codes {
*code = uint16(distance_code)
*extra_bits = 0
return
} else {
var dist uint = (uint(1) << (postfix_bits + 2)) + (distance_code - numDistanceShortCodes - num_direct_codes)
var bucket uint = uint(log2FloorNonZero(dist) - 1)
var postfix_mask uint = (1 << postfix_bits) - 1
var postfix uint = dist & postfix_mask
var prefix uint = (dist >> bucket) & 1
var offset uint = (2 + prefix) << bucket
var nbits uint = bucket - postfix_bits
*code = uint16(nbits<<10 | (numDistanceShortCodes + num_direct_codes + ((2*(nbits-1) + prefix) << postfix_bits) + postfix))
*extra_bits = uint32((dist - offset) >> postfix_bits)
}
}
+723
View File
@@ -0,0 +1,723 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
type cmdLutElement struct {
insert_len_extra_bits byte
copy_len_extra_bits byte
distance_code int8
context byte
insert_len_offset uint16
copy_len_offset uint16
}
var kCmdLut = [numCommandSymbols]cmdLutElement{
cmdLutElement{0x00, 0x00, 0, 0x00, 0x0000, 0x0002},
cmdLutElement{0x00, 0x00, 0, 0x01, 0x0000, 0x0003},
cmdLutElement{0x00, 0x00, 0, 0x02, 0x0000, 0x0004},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0005},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0006},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0007},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0008},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0009},
cmdLutElement{0x00, 0x00, 0, 0x00, 0x0001, 0x0002},
cmdLutElement{0x00, 0x00, 0, 0x01, 0x0001, 0x0003},
cmdLutElement{0x00, 0x00, 0, 0x02, 0x0001, 0x0004},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0005},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0006},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0007},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0008},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0009},
cmdLutElement{0x00, 0x00, 0, 0x00, 0x0002, 0x0002},
cmdLutElement{0x00, 0x00, 0, 0x01, 0x0002, 0x0003},
cmdLutElement{0x00, 0x00, 0, 0x02, 0x0002, 0x0004},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0005},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0006},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0007},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0008},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0009},
cmdLutElement{0x00, 0x00, 0, 0x00, 0x0003, 0x0002},
cmdLutElement{0x00, 0x00, 0, 0x01, 0x0003, 0x0003},
cmdLutElement{0x00, 0x00, 0, 0x02, 0x0003, 0x0004},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0005},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0006},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0007},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0008},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0009},
cmdLutElement{0x00, 0x00, 0, 0x00, 0x0004, 0x0002},
cmdLutElement{0x00, 0x00, 0, 0x01, 0x0004, 0x0003},
cmdLutElement{0x00, 0x00, 0, 0x02, 0x0004, 0x0004},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0005},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0006},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0007},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0008},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0009},
cmdLutElement{0x00, 0x00, 0, 0x00, 0x0005, 0x0002},
cmdLutElement{0x00, 0x00, 0, 0x01, 0x0005, 0x0003},
cmdLutElement{0x00, 0x00, 0, 0x02, 0x0005, 0x0004},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0005},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0006},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0007},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0008},
cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0009},
cmdLutElement{0x01, 0x00, 0, 0x00, 0x0006, 0x0002},
cmdLutElement{0x01, 0x00, 0, 0x01, 0x0006, 0x0003},
cmdLutElement{0x01, 0x00, 0, 0x02, 0x0006, 0x0004},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0005},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0006},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0007},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0008},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0009},
cmdLutElement{0x01, 0x00, 0, 0x00, 0x0008, 0x0002},
cmdLutElement{0x01, 0x00, 0, 0x01, 0x0008, 0x0003},
cmdLutElement{0x01, 0x00, 0, 0x02, 0x0008, 0x0004},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0005},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0006},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0007},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0008},
cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0009},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0000, 0x000a},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0000, 0x000c},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0000, 0x000e},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0000, 0x0012},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0000, 0x0016},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0000, 0x001e},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0000, 0x0026},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0000, 0x0036},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0001, 0x000a},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0001, 0x000c},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0001, 0x000e},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0001, 0x0012},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0001, 0x0016},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0001, 0x001e},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0001, 0x0026},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0001, 0x0036},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0002, 0x000a},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0002, 0x000c},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0002, 0x000e},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0002, 0x0012},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0002, 0x0016},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0002, 0x001e},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0002, 0x0026},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0002, 0x0036},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0003, 0x000a},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0003, 0x000c},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0003, 0x000e},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0003, 0x0012},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0003, 0x0016},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0003, 0x001e},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0003, 0x0026},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0003, 0x0036},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0004, 0x000a},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0004, 0x000c},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0004, 0x000e},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0004, 0x0012},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0004, 0x0016},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0004, 0x001e},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0004, 0x0026},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0004, 0x0036},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0005, 0x000a},
cmdLutElement{0x00, 0x01, 0, 0x03, 0x0005, 0x000c},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0005, 0x000e},
cmdLutElement{0x00, 0x02, 0, 0x03, 0x0005, 0x0012},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0005, 0x0016},
cmdLutElement{0x00, 0x03, 0, 0x03, 0x0005, 0x001e},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0005, 0x0026},
cmdLutElement{0x00, 0x04, 0, 0x03, 0x0005, 0x0036},
cmdLutElement{0x01, 0x01, 0, 0x03, 0x0006, 0x000a},
cmdLutElement{0x01, 0x01, 0, 0x03, 0x0006, 0x000c},
cmdLutElement{0x01, 0x02, 0, 0x03, 0x0006, 0x000e},
cmdLutElement{0x01, 0x02, 0, 0x03, 0x0006, 0x0012},
cmdLutElement{0x01, 0x03, 0, 0x03, 0x0006, 0x0016},
cmdLutElement{0x01, 0x03, 0, 0x03, 0x0006, 0x001e},
cmdLutElement{0x01, 0x04, 0, 0x03, 0x0006, 0x0026},
cmdLutElement{0x01, 0x04, 0, 0x03, 0x0006, 0x0036},
cmdLutElement{0x01, 0x01, 0, 0x03, 0x0008, 0x000a},
cmdLutElement{0x01, 0x01, 0, 0x03, 0x0008, 0x000c},
cmdLutElement{0x01, 0x02, 0, 0x03, 0x0008, 0x000e},
cmdLutElement{0x01, 0x02, 0, 0x03, 0x0008, 0x0012},
cmdLutElement{0x01, 0x03, 0, 0x03, 0x0008, 0x0016},
cmdLutElement{0x01, 0x03, 0, 0x03, 0x0008, 0x001e},
cmdLutElement{0x01, 0x04, 0, 0x03, 0x0008, 0x0026},
cmdLutElement{0x01, 0x04, 0, 0x03, 0x0008, 0x0036},
cmdLutElement{0x00, 0x00, -1, 0x00, 0x0000, 0x0002},
cmdLutElement{0x00, 0x00, -1, 0x01, 0x0000, 0x0003},
cmdLutElement{0x00, 0x00, -1, 0x02, 0x0000, 0x0004},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0005},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0006},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0007},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0008},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0009},
cmdLutElement{0x00, 0x00, -1, 0x00, 0x0001, 0x0002},
cmdLutElement{0x00, 0x00, -1, 0x01, 0x0001, 0x0003},
cmdLutElement{0x00, 0x00, -1, 0x02, 0x0001, 0x0004},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0005},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0006},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0007},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0008},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0009},
cmdLutElement{0x00, 0x00, -1, 0x00, 0x0002, 0x0002},
cmdLutElement{0x00, 0x00, -1, 0x01, 0x0002, 0x0003},
cmdLutElement{0x00, 0x00, -1, 0x02, 0x0002, 0x0004},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0005},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0006},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0007},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0008},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0009},
cmdLutElement{0x00, 0x00, -1, 0x00, 0x0003, 0x0002},
cmdLutElement{0x00, 0x00, -1, 0x01, 0x0003, 0x0003},
cmdLutElement{0x00, 0x00, -1, 0x02, 0x0003, 0x0004},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0005},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0006},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0007},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0008},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0009},
cmdLutElement{0x00, 0x00, -1, 0x00, 0x0004, 0x0002},
cmdLutElement{0x00, 0x00, -1, 0x01, 0x0004, 0x0003},
cmdLutElement{0x00, 0x00, -1, 0x02, 0x0004, 0x0004},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0005},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0006},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0007},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0008},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0009},
cmdLutElement{0x00, 0x00, -1, 0x00, 0x0005, 0x0002},
cmdLutElement{0x00, 0x00, -1, 0x01, 0x0005, 0x0003},
cmdLutElement{0x00, 0x00, -1, 0x02, 0x0005, 0x0004},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0005},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0006},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0007},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0008},
cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0009},
cmdLutElement{0x01, 0x00, -1, 0x00, 0x0006, 0x0002},
cmdLutElement{0x01, 0x00, -1, 0x01, 0x0006, 0x0003},
cmdLutElement{0x01, 0x00, -1, 0x02, 0x0006, 0x0004},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0005},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0006},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0007},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0008},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0009},
cmdLutElement{0x01, 0x00, -1, 0x00, 0x0008, 0x0002},
cmdLutElement{0x01, 0x00, -1, 0x01, 0x0008, 0x0003},
cmdLutElement{0x01, 0x00, -1, 0x02, 0x0008, 0x0004},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0005},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0006},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0007},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0008},
cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0009},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0000, 0x000a},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0000, 0x000c},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0000, 0x000e},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0000, 0x0012},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0000, 0x0016},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0000, 0x001e},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0000, 0x0026},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0000, 0x0036},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0001, 0x000a},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0001, 0x000c},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0001, 0x000e},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0001, 0x0012},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0001, 0x0016},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0001, 0x001e},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0001, 0x0026},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0001, 0x0036},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0002, 0x000a},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0002, 0x000c},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0002, 0x000e},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0002, 0x0012},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0002, 0x0016},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0002, 0x001e},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0002, 0x0026},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0002, 0x0036},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0003, 0x000a},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0003, 0x000c},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0003, 0x000e},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0003, 0x0012},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0003, 0x0016},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0003, 0x001e},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0003, 0x0026},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0003, 0x0036},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0004, 0x000a},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0004, 0x000c},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0004, 0x000e},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0004, 0x0012},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0004, 0x0016},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0004, 0x001e},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0004, 0x0026},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0004, 0x0036},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0005, 0x000a},
cmdLutElement{0x00, 0x01, -1, 0x03, 0x0005, 0x000c},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0005, 0x000e},
cmdLutElement{0x00, 0x02, -1, 0x03, 0x0005, 0x0012},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0005, 0x0016},
cmdLutElement{0x00, 0x03, -1, 0x03, 0x0005, 0x001e},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0005, 0x0026},
cmdLutElement{0x00, 0x04, -1, 0x03, 0x0005, 0x0036},
cmdLutElement{0x01, 0x01, -1, 0x03, 0x0006, 0x000a},
cmdLutElement{0x01, 0x01, -1, 0x03, 0x0006, 0x000c},
cmdLutElement{0x01, 0x02, -1, 0x03, 0x0006, 0x000e},
cmdLutElement{0x01, 0x02, -1, 0x03, 0x0006, 0x0012},
cmdLutElement{0x01, 0x03, -1, 0x03, 0x0006, 0x0016},
cmdLutElement{0x01, 0x03, -1, 0x03, 0x0006, 0x001e},
cmdLutElement{0x01, 0x04, -1, 0x03, 0x0006, 0x0026},
cmdLutElement{0x01, 0x04, -1, 0x03, 0x0006, 0x0036},
cmdLutElement{0x01, 0x01, -1, 0x03, 0x0008, 0x000a},
cmdLutElement{0x01, 0x01, -1, 0x03, 0x0008, 0x000c},
cmdLutElement{0x01, 0x02, -1, 0x03, 0x0008, 0x000e},
cmdLutElement{0x01, 0x02, -1, 0x03, 0x0008, 0x0012},
cmdLutElement{0x01, 0x03, -1, 0x03, 0x0008, 0x0016},
cmdLutElement{0x01, 0x03, -1, 0x03, 0x0008, 0x001e},
cmdLutElement{0x01, 0x04, -1, 0x03, 0x0008, 0x0026},
cmdLutElement{0x01, 0x04, -1, 0x03, 0x0008, 0x0036},
cmdLutElement{0x02, 0x00, -1, 0x00, 0x000a, 0x0002},
cmdLutElement{0x02, 0x00, -1, 0x01, 0x000a, 0x0003},
cmdLutElement{0x02, 0x00, -1, 0x02, 0x000a, 0x0004},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0005},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0006},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0007},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0008},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0009},
cmdLutElement{0x02, 0x00, -1, 0x00, 0x000e, 0x0002},
cmdLutElement{0x02, 0x00, -1, 0x01, 0x000e, 0x0003},
cmdLutElement{0x02, 0x00, -1, 0x02, 0x000e, 0x0004},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0005},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0006},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0007},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0008},
cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0009},
cmdLutElement{0x03, 0x00, -1, 0x00, 0x0012, 0x0002},
cmdLutElement{0x03, 0x00, -1, 0x01, 0x0012, 0x0003},
cmdLutElement{0x03, 0x00, -1, 0x02, 0x0012, 0x0004},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0005},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0006},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0007},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0008},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0009},
cmdLutElement{0x03, 0x00, -1, 0x00, 0x001a, 0x0002},
cmdLutElement{0x03, 0x00, -1, 0x01, 0x001a, 0x0003},
cmdLutElement{0x03, 0x00, -1, 0x02, 0x001a, 0x0004},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0005},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0006},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0007},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0008},
cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0009},
cmdLutElement{0x04, 0x00, -1, 0x00, 0x0022, 0x0002},
cmdLutElement{0x04, 0x00, -1, 0x01, 0x0022, 0x0003},
cmdLutElement{0x04, 0x00, -1, 0x02, 0x0022, 0x0004},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0005},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0006},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0007},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0008},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0009},
cmdLutElement{0x04, 0x00, -1, 0x00, 0x0032, 0x0002},
cmdLutElement{0x04, 0x00, -1, 0x01, 0x0032, 0x0003},
cmdLutElement{0x04, 0x00, -1, 0x02, 0x0032, 0x0004},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0005},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0006},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0007},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0008},
cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0009},
cmdLutElement{0x05, 0x00, -1, 0x00, 0x0042, 0x0002},
cmdLutElement{0x05, 0x00, -1, 0x01, 0x0042, 0x0003},
cmdLutElement{0x05, 0x00, -1, 0x02, 0x0042, 0x0004},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0005},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0006},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0007},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0008},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0009},
cmdLutElement{0x05, 0x00, -1, 0x00, 0x0062, 0x0002},
cmdLutElement{0x05, 0x00, -1, 0x01, 0x0062, 0x0003},
cmdLutElement{0x05, 0x00, -1, 0x02, 0x0062, 0x0004},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0005},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0006},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0007},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0008},
cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0009},
cmdLutElement{0x02, 0x01, -1, 0x03, 0x000a, 0x000a},
cmdLutElement{0x02, 0x01, -1, 0x03, 0x000a, 0x000c},
cmdLutElement{0x02, 0x02, -1, 0x03, 0x000a, 0x000e},
cmdLutElement{0x02, 0x02, -1, 0x03, 0x000a, 0x0012},
cmdLutElement{0x02, 0x03, -1, 0x03, 0x000a, 0x0016},
cmdLutElement{0x02, 0x03, -1, 0x03, 0x000a, 0x001e},
cmdLutElement{0x02, 0x04, -1, 0x03, 0x000a, 0x0026},
cmdLutElement{0x02, 0x04, -1, 0x03, 0x000a, 0x0036},
cmdLutElement{0x02, 0x01, -1, 0x03, 0x000e, 0x000a},
cmdLutElement{0x02, 0x01, -1, 0x03, 0x000e, 0x000c},
cmdLutElement{0x02, 0x02, -1, 0x03, 0x000e, 0x000e},
cmdLutElement{0x02, 0x02, -1, 0x03, 0x000e, 0x0012},
cmdLutElement{0x02, 0x03, -1, 0x03, 0x000e, 0x0016},
cmdLutElement{0x02, 0x03, -1, 0x03, 0x000e, 0x001e},
cmdLutElement{0x02, 0x04, -1, 0x03, 0x000e, 0x0026},
cmdLutElement{0x02, 0x04, -1, 0x03, 0x000e, 0x0036},
cmdLutElement{0x03, 0x01, -1, 0x03, 0x0012, 0x000a},
cmdLutElement{0x03, 0x01, -1, 0x03, 0x0012, 0x000c},
cmdLutElement{0x03, 0x02, -1, 0x03, 0x0012, 0x000e},
cmdLutElement{0x03, 0x02, -1, 0x03, 0x0012, 0x0012},
cmdLutElement{0x03, 0x03, -1, 0x03, 0x0012, 0x0016},
cmdLutElement{0x03, 0x03, -1, 0x03, 0x0012, 0x001e},
cmdLutElement{0x03, 0x04, -1, 0x03, 0x0012, 0x0026},
cmdLutElement{0x03, 0x04, -1, 0x03, 0x0012, 0x0036},
cmdLutElement{0x03, 0x01, -1, 0x03, 0x001a, 0x000a},
cmdLutElement{0x03, 0x01, -1, 0x03, 0x001a, 0x000c},
cmdLutElement{0x03, 0x02, -1, 0x03, 0x001a, 0x000e},
cmdLutElement{0x03, 0x02, -1, 0x03, 0x001a, 0x0012},
cmdLutElement{0x03, 0x03, -1, 0x03, 0x001a, 0x0016},
cmdLutElement{0x03, 0x03, -1, 0x03, 0x001a, 0x001e},
cmdLutElement{0x03, 0x04, -1, 0x03, 0x001a, 0x0026},
cmdLutElement{0x03, 0x04, -1, 0x03, 0x001a, 0x0036},
cmdLutElement{0x04, 0x01, -1, 0x03, 0x0022, 0x000a},
cmdLutElement{0x04, 0x01, -1, 0x03, 0x0022, 0x000c},
cmdLutElement{0x04, 0x02, -1, 0x03, 0x0022, 0x000e},
cmdLutElement{0x04, 0x02, -1, 0x03, 0x0022, 0x0012},
cmdLutElement{0x04, 0x03, -1, 0x03, 0x0022, 0x0016},
cmdLutElement{0x04, 0x03, -1, 0x03, 0x0022, 0x001e},
cmdLutElement{0x04, 0x04, -1, 0x03, 0x0022, 0x0026},
cmdLutElement{0x04, 0x04, -1, 0x03, 0x0022, 0x0036},
cmdLutElement{0x04, 0x01, -1, 0x03, 0x0032, 0x000a},
cmdLutElement{0x04, 0x01, -1, 0x03, 0x0032, 0x000c},
cmdLutElement{0x04, 0x02, -1, 0x03, 0x0032, 0x000e},
cmdLutElement{0x04, 0x02, -1, 0x03, 0x0032, 0x0012},
cmdLutElement{0x04, 0x03, -1, 0x03, 0x0032, 0x0016},
cmdLutElement{0x04, 0x03, -1, 0x03, 0x0032, 0x001e},
cmdLutElement{0x04, 0x04, -1, 0x03, 0x0032, 0x0026},
cmdLutElement{0x04, 0x04, -1, 0x03, 0x0032, 0x0036},
cmdLutElement{0x05, 0x01, -1, 0x03, 0x0042, 0x000a},
cmdLutElement{0x05, 0x01, -1, 0x03, 0x0042, 0x000c},
cmdLutElement{0x05, 0x02, -1, 0x03, 0x0042, 0x000e},
cmdLutElement{0x05, 0x02, -1, 0x03, 0x0042, 0x0012},
cmdLutElement{0x05, 0x03, -1, 0x03, 0x0042, 0x0016},
cmdLutElement{0x05, 0x03, -1, 0x03, 0x0042, 0x001e},
cmdLutElement{0x05, 0x04, -1, 0x03, 0x0042, 0x0026},
cmdLutElement{0x05, 0x04, -1, 0x03, 0x0042, 0x0036},
cmdLutElement{0x05, 0x01, -1, 0x03, 0x0062, 0x000a},
cmdLutElement{0x05, 0x01, -1, 0x03, 0x0062, 0x000c},
cmdLutElement{0x05, 0x02, -1, 0x03, 0x0062, 0x000e},
cmdLutElement{0x05, 0x02, -1, 0x03, 0x0062, 0x0012},
cmdLutElement{0x05, 0x03, -1, 0x03, 0x0062, 0x0016},
cmdLutElement{0x05, 0x03, -1, 0x03, 0x0062, 0x001e},
cmdLutElement{0x05, 0x04, -1, 0x03, 0x0062, 0x0026},
cmdLutElement{0x05, 0x04, -1, 0x03, 0x0062, 0x0036},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0000, 0x0046},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0000, 0x0066},
cmdLutElement{0x00, 0x06, -1, 0x03, 0x0000, 0x0086},
cmdLutElement{0x00, 0x07, -1, 0x03, 0x0000, 0x00c6},
cmdLutElement{0x00, 0x08, -1, 0x03, 0x0000, 0x0146},
cmdLutElement{0x00, 0x09, -1, 0x03, 0x0000, 0x0246},
cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0000, 0x0446},
cmdLutElement{0x00, 0x18, -1, 0x03, 0x0000, 0x0846},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0001, 0x0046},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0001, 0x0066},
cmdLutElement{0x00, 0x06, -1, 0x03, 0x0001, 0x0086},
cmdLutElement{0x00, 0x07, -1, 0x03, 0x0001, 0x00c6},
cmdLutElement{0x00, 0x08, -1, 0x03, 0x0001, 0x0146},
cmdLutElement{0x00, 0x09, -1, 0x03, 0x0001, 0x0246},
cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0001, 0x0446},
cmdLutElement{0x00, 0x18, -1, 0x03, 0x0001, 0x0846},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0002, 0x0046},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0002, 0x0066},
cmdLutElement{0x00, 0x06, -1, 0x03, 0x0002, 0x0086},
cmdLutElement{0x00, 0x07, -1, 0x03, 0x0002, 0x00c6},
cmdLutElement{0x00, 0x08, -1, 0x03, 0x0002, 0x0146},
cmdLutElement{0x00, 0x09, -1, 0x03, 0x0002, 0x0246},
cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0002, 0x0446},
cmdLutElement{0x00, 0x18, -1, 0x03, 0x0002, 0x0846},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0003, 0x0046},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0003, 0x0066},
cmdLutElement{0x00, 0x06, -1, 0x03, 0x0003, 0x0086},
cmdLutElement{0x00, 0x07, -1, 0x03, 0x0003, 0x00c6},
cmdLutElement{0x00, 0x08, -1, 0x03, 0x0003, 0x0146},
cmdLutElement{0x00, 0x09, -1, 0x03, 0x0003, 0x0246},
cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0003, 0x0446},
cmdLutElement{0x00, 0x18, -1, 0x03, 0x0003, 0x0846},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0004, 0x0046},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0004, 0x0066},
cmdLutElement{0x00, 0x06, -1, 0x03, 0x0004, 0x0086},
cmdLutElement{0x00, 0x07, -1, 0x03, 0x0004, 0x00c6},
cmdLutElement{0x00, 0x08, -1, 0x03, 0x0004, 0x0146},
cmdLutElement{0x00, 0x09, -1, 0x03, 0x0004, 0x0246},
cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0004, 0x0446},
cmdLutElement{0x00, 0x18, -1, 0x03, 0x0004, 0x0846},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0005, 0x0046},
cmdLutElement{0x00, 0x05, -1, 0x03, 0x0005, 0x0066},
cmdLutElement{0x00, 0x06, -1, 0x03, 0x0005, 0x0086},
cmdLutElement{0x00, 0x07, -1, 0x03, 0x0005, 0x00c6},
cmdLutElement{0x00, 0x08, -1, 0x03, 0x0005, 0x0146},
cmdLutElement{0x00, 0x09, -1, 0x03, 0x0005, 0x0246},
cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0005, 0x0446},
cmdLutElement{0x00, 0x18, -1, 0x03, 0x0005, 0x0846},
cmdLutElement{0x01, 0x05, -1, 0x03, 0x0006, 0x0046},
cmdLutElement{0x01, 0x05, -1, 0x03, 0x0006, 0x0066},
cmdLutElement{0x01, 0x06, -1, 0x03, 0x0006, 0x0086},
cmdLutElement{0x01, 0x07, -1, 0x03, 0x0006, 0x00c6},
cmdLutElement{0x01, 0x08, -1, 0x03, 0x0006, 0x0146},
cmdLutElement{0x01, 0x09, -1, 0x03, 0x0006, 0x0246},
cmdLutElement{0x01, 0x0a, -1, 0x03, 0x0006, 0x0446},
cmdLutElement{0x01, 0x18, -1, 0x03, 0x0006, 0x0846},
cmdLutElement{0x01, 0x05, -1, 0x03, 0x0008, 0x0046},
cmdLutElement{0x01, 0x05, -1, 0x03, 0x0008, 0x0066},
cmdLutElement{0x01, 0x06, -1, 0x03, 0x0008, 0x0086},
cmdLutElement{0x01, 0x07, -1, 0x03, 0x0008, 0x00c6},
cmdLutElement{0x01, 0x08, -1, 0x03, 0x0008, 0x0146},
cmdLutElement{0x01, 0x09, -1, 0x03, 0x0008, 0x0246},
cmdLutElement{0x01, 0x0a, -1, 0x03, 0x0008, 0x0446},
cmdLutElement{0x01, 0x18, -1, 0x03, 0x0008, 0x0846},
cmdLutElement{0x06, 0x00, -1, 0x00, 0x0082, 0x0002},
cmdLutElement{0x06, 0x00, -1, 0x01, 0x0082, 0x0003},
cmdLutElement{0x06, 0x00, -1, 0x02, 0x0082, 0x0004},
cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0005},
cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0006},
cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0007},
cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0008},
cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0009},
cmdLutElement{0x07, 0x00, -1, 0x00, 0x00c2, 0x0002},
cmdLutElement{0x07, 0x00, -1, 0x01, 0x00c2, 0x0003},
cmdLutElement{0x07, 0x00, -1, 0x02, 0x00c2, 0x0004},
cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0005},
cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0006},
cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0007},
cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0008},
cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0009},
cmdLutElement{0x08, 0x00, -1, 0x00, 0x0142, 0x0002},
cmdLutElement{0x08, 0x00, -1, 0x01, 0x0142, 0x0003},
cmdLutElement{0x08, 0x00, -1, 0x02, 0x0142, 0x0004},
cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0005},
cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0006},
cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0007},
cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0008},
cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0009},
cmdLutElement{0x09, 0x00, -1, 0x00, 0x0242, 0x0002},
cmdLutElement{0x09, 0x00, -1, 0x01, 0x0242, 0x0003},
cmdLutElement{0x09, 0x00, -1, 0x02, 0x0242, 0x0004},
cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0005},
cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0006},
cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0007},
cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0008},
cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0009},
cmdLutElement{0x0a, 0x00, -1, 0x00, 0x0442, 0x0002},
cmdLutElement{0x0a, 0x00, -1, 0x01, 0x0442, 0x0003},
cmdLutElement{0x0a, 0x00, -1, 0x02, 0x0442, 0x0004},
cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0005},
cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0006},
cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0007},
cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0008},
cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0009},
cmdLutElement{0x0c, 0x00, -1, 0x00, 0x0842, 0x0002},
cmdLutElement{0x0c, 0x00, -1, 0x01, 0x0842, 0x0003},
cmdLutElement{0x0c, 0x00, -1, 0x02, 0x0842, 0x0004},
cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0005},
cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0006},
cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0007},
cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0008},
cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0009},
cmdLutElement{0x0e, 0x00, -1, 0x00, 0x1842, 0x0002},
cmdLutElement{0x0e, 0x00, -1, 0x01, 0x1842, 0x0003},
cmdLutElement{0x0e, 0x00, -1, 0x02, 0x1842, 0x0004},
cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0005},
cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0006},
cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0007},
cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0008},
cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0009},
cmdLutElement{0x18, 0x00, -1, 0x00, 0x5842, 0x0002},
cmdLutElement{0x18, 0x00, -1, 0x01, 0x5842, 0x0003},
cmdLutElement{0x18, 0x00, -1, 0x02, 0x5842, 0x0004},
cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0005},
cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0006},
cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0007},
cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0008},
cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0009},
cmdLutElement{0x02, 0x05, -1, 0x03, 0x000a, 0x0046},
cmdLutElement{0x02, 0x05, -1, 0x03, 0x000a, 0x0066},
cmdLutElement{0x02, 0x06, -1, 0x03, 0x000a, 0x0086},
cmdLutElement{0x02, 0x07, -1, 0x03, 0x000a, 0x00c6},
cmdLutElement{0x02, 0x08, -1, 0x03, 0x000a, 0x0146},
cmdLutElement{0x02, 0x09, -1, 0x03, 0x000a, 0x0246},
cmdLutElement{0x02, 0x0a, -1, 0x03, 0x000a, 0x0446},
cmdLutElement{0x02, 0x18, -1, 0x03, 0x000a, 0x0846},
cmdLutElement{0x02, 0x05, -1, 0x03, 0x000e, 0x0046},
cmdLutElement{0x02, 0x05, -1, 0x03, 0x000e, 0x0066},
cmdLutElement{0x02, 0x06, -1, 0x03, 0x000e, 0x0086},
cmdLutElement{0x02, 0x07, -1, 0x03, 0x000e, 0x00c6},
cmdLutElement{0x02, 0x08, -1, 0x03, 0x000e, 0x0146},
cmdLutElement{0x02, 0x09, -1, 0x03, 0x000e, 0x0246},
cmdLutElement{0x02, 0x0a, -1, 0x03, 0x000e, 0x0446},
cmdLutElement{0x02, 0x18, -1, 0x03, 0x000e, 0x0846},
cmdLutElement{0x03, 0x05, -1, 0x03, 0x0012, 0x0046},
cmdLutElement{0x03, 0x05, -1, 0x03, 0x0012, 0x0066},
cmdLutElement{0x03, 0x06, -1, 0x03, 0x0012, 0x0086},
cmdLutElement{0x03, 0x07, -1, 0x03, 0x0012, 0x00c6},
cmdLutElement{0x03, 0x08, -1, 0x03, 0x0012, 0x0146},
cmdLutElement{0x03, 0x09, -1, 0x03, 0x0012, 0x0246},
cmdLutElement{0x03, 0x0a, -1, 0x03, 0x0012, 0x0446},
cmdLutElement{0x03, 0x18, -1, 0x03, 0x0012, 0x0846},
cmdLutElement{0x03, 0x05, -1, 0x03, 0x001a, 0x0046},
cmdLutElement{0x03, 0x05, -1, 0x03, 0x001a, 0x0066},
cmdLutElement{0x03, 0x06, -1, 0x03, 0x001a, 0x0086},
cmdLutElement{0x03, 0x07, -1, 0x03, 0x001a, 0x00c6},
cmdLutElement{0x03, 0x08, -1, 0x03, 0x001a, 0x0146},
cmdLutElement{0x03, 0x09, -1, 0x03, 0x001a, 0x0246},
cmdLutElement{0x03, 0x0a, -1, 0x03, 0x001a, 0x0446},
cmdLutElement{0x03, 0x18, -1, 0x03, 0x001a, 0x0846},
cmdLutElement{0x04, 0x05, -1, 0x03, 0x0022, 0x0046},
cmdLutElement{0x04, 0x05, -1, 0x03, 0x0022, 0x0066},
cmdLutElement{0x04, 0x06, -1, 0x03, 0x0022, 0x0086},
cmdLutElement{0x04, 0x07, -1, 0x03, 0x0022, 0x00c6},
cmdLutElement{0x04, 0x08, -1, 0x03, 0x0022, 0x0146},
cmdLutElement{0x04, 0x09, -1, 0x03, 0x0022, 0x0246},
cmdLutElement{0x04, 0x0a, -1, 0x03, 0x0022, 0x0446},
cmdLutElement{0x04, 0x18, -1, 0x03, 0x0022, 0x0846},
cmdLutElement{0x04, 0x05, -1, 0x03, 0x0032, 0x0046},
cmdLutElement{0x04, 0x05, -1, 0x03, 0x0032, 0x0066},
cmdLutElement{0x04, 0x06, -1, 0x03, 0x0032, 0x0086},
cmdLutElement{0x04, 0x07, -1, 0x03, 0x0032, 0x00c6},
cmdLutElement{0x04, 0x08, -1, 0x03, 0x0032, 0x0146},
cmdLutElement{0x04, 0x09, -1, 0x03, 0x0032, 0x0246},
cmdLutElement{0x04, 0x0a, -1, 0x03, 0x0032, 0x0446},
cmdLutElement{0x04, 0x18, -1, 0x03, 0x0032, 0x0846},
cmdLutElement{0x05, 0x05, -1, 0x03, 0x0042, 0x0046},
cmdLutElement{0x05, 0x05, -1, 0x03, 0x0042, 0x0066},
cmdLutElement{0x05, 0x06, -1, 0x03, 0x0042, 0x0086},
cmdLutElement{0x05, 0x07, -1, 0x03, 0x0042, 0x00c6},
cmdLutElement{0x05, 0x08, -1, 0x03, 0x0042, 0x0146},
cmdLutElement{0x05, 0x09, -1, 0x03, 0x0042, 0x0246},
cmdLutElement{0x05, 0x0a, -1, 0x03, 0x0042, 0x0446},
cmdLutElement{0x05, 0x18, -1, 0x03, 0x0042, 0x0846},
cmdLutElement{0x05, 0x05, -1, 0x03, 0x0062, 0x0046},
cmdLutElement{0x05, 0x05, -1, 0x03, 0x0062, 0x0066},
cmdLutElement{0x05, 0x06, -1, 0x03, 0x0062, 0x0086},
cmdLutElement{0x05, 0x07, -1, 0x03, 0x0062, 0x00c6},
cmdLutElement{0x05, 0x08, -1, 0x03, 0x0062, 0x0146},
cmdLutElement{0x05, 0x09, -1, 0x03, 0x0062, 0x0246},
cmdLutElement{0x05, 0x0a, -1, 0x03, 0x0062, 0x0446},
cmdLutElement{0x05, 0x18, -1, 0x03, 0x0062, 0x0846},
cmdLutElement{0x06, 0x01, -1, 0x03, 0x0082, 0x000a},
cmdLutElement{0x06, 0x01, -1, 0x03, 0x0082, 0x000c},
cmdLutElement{0x06, 0x02, -1, 0x03, 0x0082, 0x000e},
cmdLutElement{0x06, 0x02, -1, 0x03, 0x0082, 0x0012},
cmdLutElement{0x06, 0x03, -1, 0x03, 0x0082, 0x0016},
cmdLutElement{0x06, 0x03, -1, 0x03, 0x0082, 0x001e},
cmdLutElement{0x06, 0x04, -1, 0x03, 0x0082, 0x0026},
cmdLutElement{0x06, 0x04, -1, 0x03, 0x0082, 0x0036},
cmdLutElement{0x07, 0x01, -1, 0x03, 0x00c2, 0x000a},
cmdLutElement{0x07, 0x01, -1, 0x03, 0x00c2, 0x000c},
cmdLutElement{0x07, 0x02, -1, 0x03, 0x00c2, 0x000e},
cmdLutElement{0x07, 0x02, -1, 0x03, 0x00c2, 0x0012},
cmdLutElement{0x07, 0x03, -1, 0x03, 0x00c2, 0x0016},
cmdLutElement{0x07, 0x03, -1, 0x03, 0x00c2, 0x001e},
cmdLutElement{0x07, 0x04, -1, 0x03, 0x00c2, 0x0026},
cmdLutElement{0x07, 0x04, -1, 0x03, 0x00c2, 0x0036},
cmdLutElement{0x08, 0x01, -1, 0x03, 0x0142, 0x000a},
cmdLutElement{0x08, 0x01, -1, 0x03, 0x0142, 0x000c},
cmdLutElement{0x08, 0x02, -1, 0x03, 0x0142, 0x000e},
cmdLutElement{0x08, 0x02, -1, 0x03, 0x0142, 0x0012},
cmdLutElement{0x08, 0x03, -1, 0x03, 0x0142, 0x0016},
cmdLutElement{0x08, 0x03, -1, 0x03, 0x0142, 0x001e},
cmdLutElement{0x08, 0x04, -1, 0x03, 0x0142, 0x0026},
cmdLutElement{0x08, 0x04, -1, 0x03, 0x0142, 0x0036},
cmdLutElement{0x09, 0x01, -1, 0x03, 0x0242, 0x000a},
cmdLutElement{0x09, 0x01, -1, 0x03, 0x0242, 0x000c},
cmdLutElement{0x09, 0x02, -1, 0x03, 0x0242, 0x000e},
cmdLutElement{0x09, 0x02, -1, 0x03, 0x0242, 0x0012},
cmdLutElement{0x09, 0x03, -1, 0x03, 0x0242, 0x0016},
cmdLutElement{0x09, 0x03, -1, 0x03, 0x0242, 0x001e},
cmdLutElement{0x09, 0x04, -1, 0x03, 0x0242, 0x0026},
cmdLutElement{0x09, 0x04, -1, 0x03, 0x0242, 0x0036},
cmdLutElement{0x0a, 0x01, -1, 0x03, 0x0442, 0x000a},
cmdLutElement{0x0a, 0x01, -1, 0x03, 0x0442, 0x000c},
cmdLutElement{0x0a, 0x02, -1, 0x03, 0x0442, 0x000e},
cmdLutElement{0x0a, 0x02, -1, 0x03, 0x0442, 0x0012},
cmdLutElement{0x0a, 0x03, -1, 0x03, 0x0442, 0x0016},
cmdLutElement{0x0a, 0x03, -1, 0x03, 0x0442, 0x001e},
cmdLutElement{0x0a, 0x04, -1, 0x03, 0x0442, 0x0026},
cmdLutElement{0x0a, 0x04, -1, 0x03, 0x0442, 0x0036},
cmdLutElement{0x0c, 0x01, -1, 0x03, 0x0842, 0x000a},
cmdLutElement{0x0c, 0x01, -1, 0x03, 0x0842, 0x000c},
cmdLutElement{0x0c, 0x02, -1, 0x03, 0x0842, 0x000e},
cmdLutElement{0x0c, 0x02, -1, 0x03, 0x0842, 0x0012},
cmdLutElement{0x0c, 0x03, -1, 0x03, 0x0842, 0x0016},
cmdLutElement{0x0c, 0x03, -1, 0x03, 0x0842, 0x001e},
cmdLutElement{0x0c, 0x04, -1, 0x03, 0x0842, 0x0026},
cmdLutElement{0x0c, 0x04, -1, 0x03, 0x0842, 0x0036},
cmdLutElement{0x0e, 0x01, -1, 0x03, 0x1842, 0x000a},
cmdLutElement{0x0e, 0x01, -1, 0x03, 0x1842, 0x000c},
cmdLutElement{0x0e, 0x02, -1, 0x03, 0x1842, 0x000e},
cmdLutElement{0x0e, 0x02, -1, 0x03, 0x1842, 0x0012},
cmdLutElement{0x0e, 0x03, -1, 0x03, 0x1842, 0x0016},
cmdLutElement{0x0e, 0x03, -1, 0x03, 0x1842, 0x001e},
cmdLutElement{0x0e, 0x04, -1, 0x03, 0x1842, 0x0026},
cmdLutElement{0x0e, 0x04, -1, 0x03, 0x1842, 0x0036},
cmdLutElement{0x18, 0x01, -1, 0x03, 0x5842, 0x000a},
cmdLutElement{0x18, 0x01, -1, 0x03, 0x5842, 0x000c},
cmdLutElement{0x18, 0x02, -1, 0x03, 0x5842, 0x000e},
cmdLutElement{0x18, 0x02, -1, 0x03, 0x5842, 0x0012},
cmdLutElement{0x18, 0x03, -1, 0x03, 0x5842, 0x0016},
cmdLutElement{0x18, 0x03, -1, 0x03, 0x5842, 0x001e},
cmdLutElement{0x18, 0x04, -1, 0x03, 0x5842, 0x0026},
cmdLutElement{0x18, 0x04, -1, 0x03, 0x5842, 0x0036},
cmdLutElement{0x06, 0x05, -1, 0x03, 0x0082, 0x0046},
cmdLutElement{0x06, 0x05, -1, 0x03, 0x0082, 0x0066},
cmdLutElement{0x06, 0x06, -1, 0x03, 0x0082, 0x0086},
cmdLutElement{0x06, 0x07, -1, 0x03, 0x0082, 0x00c6},
cmdLutElement{0x06, 0x08, -1, 0x03, 0x0082, 0x0146},
cmdLutElement{0x06, 0x09, -1, 0x03, 0x0082, 0x0246},
cmdLutElement{0x06, 0x0a, -1, 0x03, 0x0082, 0x0446},
cmdLutElement{0x06, 0x18, -1, 0x03, 0x0082, 0x0846},
cmdLutElement{0x07, 0x05, -1, 0x03, 0x00c2, 0x0046},
cmdLutElement{0x07, 0x05, -1, 0x03, 0x00c2, 0x0066},
cmdLutElement{0x07, 0x06, -1, 0x03, 0x00c2, 0x0086},
cmdLutElement{0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6},
cmdLutElement{0x07, 0x08, -1, 0x03, 0x00c2, 0x0146},
cmdLutElement{0x07, 0x09, -1, 0x03, 0x00c2, 0x0246},
cmdLutElement{0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446},
cmdLutElement{0x07, 0x18, -1, 0x03, 0x00c2, 0x0846},
cmdLutElement{0x08, 0x05, -1, 0x03, 0x0142, 0x0046},
cmdLutElement{0x08, 0x05, -1, 0x03, 0x0142, 0x0066},
cmdLutElement{0x08, 0x06, -1, 0x03, 0x0142, 0x0086},
cmdLutElement{0x08, 0x07, -1, 0x03, 0x0142, 0x00c6},
cmdLutElement{0x08, 0x08, -1, 0x03, 0x0142, 0x0146},
cmdLutElement{0x08, 0x09, -1, 0x03, 0x0142, 0x0246},
cmdLutElement{0x08, 0x0a, -1, 0x03, 0x0142, 0x0446},
cmdLutElement{0x08, 0x18, -1, 0x03, 0x0142, 0x0846},
cmdLutElement{0x09, 0x05, -1, 0x03, 0x0242, 0x0046},
cmdLutElement{0x09, 0x05, -1, 0x03, 0x0242, 0x0066},
cmdLutElement{0x09, 0x06, -1, 0x03, 0x0242, 0x0086},
cmdLutElement{0x09, 0x07, -1, 0x03, 0x0242, 0x00c6},
cmdLutElement{0x09, 0x08, -1, 0x03, 0x0242, 0x0146},
cmdLutElement{0x09, 0x09, -1, 0x03, 0x0242, 0x0246},
cmdLutElement{0x09, 0x0a, -1, 0x03, 0x0242, 0x0446},
cmdLutElement{0x09, 0x18, -1, 0x03, 0x0242, 0x0846},
cmdLutElement{0x0a, 0x05, -1, 0x03, 0x0442, 0x0046},
cmdLutElement{0x0a, 0x05, -1, 0x03, 0x0442, 0x0066},
cmdLutElement{0x0a, 0x06, -1, 0x03, 0x0442, 0x0086},
cmdLutElement{0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6},
cmdLutElement{0x0a, 0x08, -1, 0x03, 0x0442, 0x0146},
cmdLutElement{0x0a, 0x09, -1, 0x03, 0x0442, 0x0246},
cmdLutElement{0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446},
cmdLutElement{0x0a, 0x18, -1, 0x03, 0x0442, 0x0846},
cmdLutElement{0x0c, 0x05, -1, 0x03, 0x0842, 0x0046},
cmdLutElement{0x0c, 0x05, -1, 0x03, 0x0842, 0x0066},
cmdLutElement{0x0c, 0x06, -1, 0x03, 0x0842, 0x0086},
cmdLutElement{0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6},
cmdLutElement{0x0c, 0x08, -1, 0x03, 0x0842, 0x0146},
cmdLutElement{0x0c, 0x09, -1, 0x03, 0x0842, 0x0246},
cmdLutElement{0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446},
cmdLutElement{0x0c, 0x18, -1, 0x03, 0x0842, 0x0846},
cmdLutElement{0x0e, 0x05, -1, 0x03, 0x1842, 0x0046},
cmdLutElement{0x0e, 0x05, -1, 0x03, 0x1842, 0x0066},
cmdLutElement{0x0e, 0x06, -1, 0x03, 0x1842, 0x0086},
cmdLutElement{0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6},
cmdLutElement{0x0e, 0x08, -1, 0x03, 0x1842, 0x0146},
cmdLutElement{0x0e, 0x09, -1, 0x03, 0x1842, 0x0246},
cmdLutElement{0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446},
cmdLutElement{0x0e, 0x18, -1, 0x03, 0x1842, 0x0846},
cmdLutElement{0x18, 0x05, -1, 0x03, 0x5842, 0x0046},
cmdLutElement{0x18, 0x05, -1, 0x03, 0x5842, 0x0066},
cmdLutElement{0x18, 0x06, -1, 0x03, 0x5842, 0x0086},
cmdLutElement{0x18, 0x07, -1, 0x03, 0x5842, 0x00c6},
cmdLutElement{0x18, 0x08, -1, 0x03, 0x5842, 0x0146},
cmdLutElement{0x18, 0x09, -1, 0x03, 0x5842, 0x0246},
cmdLutElement{0x18, 0x0a, -1, 0x03, 0x5842, 0x0446},
cmdLutElement{0x18, 0x18, -1, 0x03, 0x5842, 0x0846},
}
+196
View File
@@ -0,0 +1,196 @@
package brotli
const fastOnePassCompressionQuality = 0
const fastTwoPassCompressionQuality = 1
const zopflificationQuality = 10
const hqZopflificationQuality = 11
const maxQualityForStaticEntropyCodes = 2
const minQualityForBlockSplit = 4
const minQualityForNonzeroDistanceParams = 4
const minQualityForOptimizeHistograms = 4
const minQualityForExtensiveReferenceSearch = 5
const minQualityForContextModeling = 5
const minQualityForHqContextModeling = 7
const minQualityForHqBlockSplitting = 10
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
so we buffer at most this much literals and commands. */
const maxNumDelayedSymbols = 0x2FFF
/* Returns hash-table size for quality levels 0 and 1. */
func maxHashTableSize(quality int) uint {
if quality == fastOnePassCompressionQuality {
return 1 << 15
} else {
return 1 << 17
}
}
/* The maximum length for which the zopflification uses distinct distances. */
const maxZopfliLenQuality10 = 150
const maxZopfliLenQuality11 = 325
/* Do not thoroughly search when a long copy is found. */
const longCopyQuickStep = 16384
func maxZopfliLen(params *encoderParams) uint {
if params.quality <= 10 {
return maxZopfliLenQuality10
} else {
return maxZopfliLenQuality11
}
}
/* Number of best candidates to evaluate to expand Zopfli chain. */
func maxZopfliCandidates(params *encoderParams) uint {
if params.quality <= 10 {
return 1
} else {
return 5
}
}
func sanitizeParams(params *encoderParams) {
params.quality = brotli_min_int(maxQuality, brotli_max_int(minQuality, params.quality))
if params.quality <= maxQualityForStaticEntropyCodes {
params.large_window = false
}
if params.lgwin < minWindowBits {
params.lgwin = minWindowBits
} else {
var max_lgwin int
if params.large_window {
max_lgwin = largeMaxWindowBits
} else {
max_lgwin = maxWindowBits
}
if params.lgwin > uint(max_lgwin) {
params.lgwin = uint(max_lgwin)
}
}
}
/* Returns optimized lg_block value. */
func computeLgBlock(params *encoderParams) int {
var lgblock int = params.lgblock
if params.quality == fastOnePassCompressionQuality || params.quality == fastTwoPassCompressionQuality {
lgblock = int(params.lgwin)
} else if params.quality < minQualityForBlockSplit {
lgblock = 14
} else if lgblock == 0 {
lgblock = 16
if params.quality >= 9 && params.lgwin > uint(lgblock) {
lgblock = brotli_min_int(18, int(params.lgwin))
}
} else {
lgblock = brotli_min_int(maxInputBlockBits, brotli_max_int(minInputBlockBits, lgblock))
}
return lgblock
}
/* Returns log2 of the size of main ring buffer area.
Allocate at least lgwin + 1 bits for the ring buffer so that the newly
added block fits there completely and we still get lgwin bits and at least
read_block_size_bits + 1 bits because the copy tail length needs to be
smaller than ring-buffer size. */
func computeRbBits(params *encoderParams) int {
return 1 + brotli_max_int(int(params.lgwin), params.lgblock)
}
func maxMetablockSize(params *encoderParams) uint {
var bits int = brotli_min_int(computeRbBits(params), maxInputBlockBits)
return uint(1) << uint(bits)
}
/* When searching for backward references and have not seen matches for a long
time, we can skip some match lookups. Unsuccessful match lookups are very
expensive and this kind of a heuristic speeds up compression quite a lot.
At first 8 byte strides are taken and every second byte is put to hasher.
After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
Applied only to qualities 2 to 9. */
func literalSpreeLengthForSparseSearch(params *encoderParams) uint {
if params.quality < 9 {
return 64
} else {
return 512
}
}
func chooseHasher(params *encoderParams, hparams *hasherParams) {
if params.quality > 9 {
hparams.type_ = 10
} else if params.quality == 4 && params.size_hint >= 1<<20 {
hparams.type_ = 54
} else if params.quality < 5 {
hparams.type_ = params.quality
} else if params.lgwin <= 16 {
if params.quality < 7 {
hparams.type_ = 40
} else if params.quality < 9 {
hparams.type_ = 41
} else {
hparams.type_ = 42
}
} else if params.size_hint >= 1<<20 && params.lgwin >= 19 {
hparams.type_ = 6
hparams.block_bits = params.quality - 1
hparams.bucket_bits = 15
hparams.hash_len = 5
if params.quality < 7 {
hparams.num_last_distances_to_check = 4
} else if params.quality < 9 {
hparams.num_last_distances_to_check = 10
} else {
hparams.num_last_distances_to_check = 16
}
} else {
hparams.type_ = 5
hparams.block_bits = params.quality - 1
if params.quality < 7 {
hparams.bucket_bits = 14
} else {
hparams.bucket_bits = 15
}
if params.quality < 7 {
hparams.num_last_distances_to_check = 4
} else if params.quality < 9 {
hparams.num_last_distances_to_check = 10
} else {
hparams.num_last_distances_to_check = 16
}
}
if params.lgwin > 24 {
/* Different hashers for large window brotli: not for qualities <= 2,
these are too fast for large window. Not for qualities >= 10: their
hasher already works well with large window. So the changes are:
H3 --> H35: for quality 3.
H54 --> H55: for quality 4 with size hint > 1MB
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
if hparams.type_ == 3 {
hparams.type_ = 35
}
if hparams.type_ == 54 {
hparams.type_ = 55
}
if hparams.type_ == 6 {
hparams.type_ = 65
}
}
}
+108
View File
@@ -0,0 +1,108 @@
package brotli
import (
"errors"
"io"
)
type decodeError int
func (err decodeError) Error() string {
return "brotli: " + string(decoderErrorString(int(err)))
}
var errExcessiveInput = errors.New("brotli: excessive input")
var errInvalidState = errors.New("brotli: invalid state")
// readBufSize is a "good" buffer size that avoids excessive round-trips
// between C and Go but doesn't waste too much memory on buffering.
// It is arbitrarily chosen to be equal to the constant used in io.Copy.
const readBufSize = 32 * 1024
// NewReader creates a new Reader reading the given reader.
func NewReader(src io.Reader) *Reader {
r := new(Reader)
r.Reset(src)
return r
}
// Reset discards the Reader's state and makes it equivalent to the result of
// its original state from NewReader, but reading from src instead.
// This permits reusing a Reader rather than allocating a new one.
// Error is always nil
func (r *Reader) Reset(src io.Reader) error {
if r.error_code < 0 {
// There was an unrecoverable error, leaving the Reader's state
// undefined. Clear out everything but the buffer.
*r = Reader{buf: r.buf}
}
decoderStateInit(r)
r.src = src
if r.buf == nil {
r.buf = make([]byte, readBufSize)
}
return nil
}
func (r *Reader) Read(p []byte) (n int, err error) {
if !decoderHasMoreOutput(r) && len(r.in) == 0 {
m, readErr := r.src.Read(r.buf)
if m == 0 {
// If readErr is `nil`, we just proxy underlying stream behavior.
return 0, readErr
}
r.in = r.buf[:m]
}
if len(p) == 0 {
return 0, nil
}
for {
var written uint
in_len := uint(len(r.in))
out_len := uint(len(p))
in_remaining := in_len
out_remaining := out_len
result := decoderDecompressStream(r, &in_remaining, &r.in, &out_remaining, &p)
written = out_len - out_remaining
n = int(written)
switch result {
case decoderResultSuccess:
if len(r.in) > 0 {
return n, errExcessiveInput
}
return n, nil
case decoderResultError:
return n, decodeError(decoderGetErrorCode(r))
case decoderResultNeedsMoreOutput:
if n == 0 {
return 0, io.ErrShortBuffer
}
return n, nil
case decoderNeedsMoreInput:
}
if len(r.in) != 0 {
return 0, errInvalidState
}
// Calling r.src.Read may block. Don't block if we have data to return.
if n > 0 {
return n, nil
}
// Top off the buffer.
encN, err := r.src.Read(r.buf)
if encN == 0 {
// Not enough data to complete decoding.
if err == io.EOF {
return 0, io.ErrUnexpectedEOF
}
return 0, err
}
r.in = r.buf[:encN]
}
}
+134
View File
@@ -0,0 +1,134 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* A ringBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
data in a circular manner: writing a byte writes it to:
`position() % (1 << window_bits)'.
For convenience, the ringBuffer array contains another copy of the
first `1 << tail_bits' bytes:
buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
and another copy of the last two bytes:
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
type ringBuffer struct {
size_ uint32
mask_ uint32
tail_size_ uint32
total_size_ uint32
cur_size_ uint32
pos_ uint32
data_ []byte
buffer_ []byte
}
func ringBufferInit(rb *ringBuffer) {
rb.pos_ = 0
}
func ringBufferSetup(params *encoderParams, rb *ringBuffer) {
var window_bits int = computeRbBits(params)
var tail_bits int = params.lgblock
*(*uint32)(&rb.size_) = 1 << uint(window_bits)
*(*uint32)(&rb.mask_) = (1 << uint(window_bits)) - 1
*(*uint32)(&rb.tail_size_) = 1 << uint(tail_bits)
*(*uint32)(&rb.total_size_) = rb.size_ + rb.tail_size_
}
const kSlackForEightByteHashingEverywhere uint = 7
/* Allocates or re-allocates data_ to the given length + plus some slack
region before and after. Fills the slack regions with zeros. */
func ringBufferInitBuffer(buflen uint32, rb *ringBuffer) {
var new_data []byte
var i uint
size := 2 + int(buflen) + int(kSlackForEightByteHashingEverywhere)
if cap(rb.data_) < size {
new_data = make([]byte, size)
} else {
new_data = rb.data_[:size]
}
if rb.data_ != nil {
copy(new_data, rb.data_[:2+rb.cur_size_+uint32(kSlackForEightByteHashingEverywhere)])
}
rb.data_ = new_data
rb.cur_size_ = buflen
rb.buffer_ = rb.data_[2:]
rb.data_[1] = 0
rb.data_[0] = rb.data_[1]
for i = 0; i < kSlackForEightByteHashingEverywhere; i++ {
rb.buffer_[rb.cur_size_+uint32(i)] = 0
}
}
func ringBufferWriteTail(bytes []byte, n uint, rb *ringBuffer) {
var masked_pos uint = uint(rb.pos_ & rb.mask_)
if uint32(masked_pos) < rb.tail_size_ {
/* Just fill the tail buffer with the beginning data. */
var p uint = uint(rb.size_ + uint32(masked_pos))
copy(rb.buffer_[p:], bytes[:brotli_min_size_t(n, uint(rb.tail_size_-uint32(masked_pos)))])
}
}
/* Push bytes into the ring buffer. */
func ringBufferWrite(bytes []byte, n uint, rb *ringBuffer) {
if rb.pos_ == 0 && uint32(n) < rb.tail_size_ {
/* Special case for the first write: to process the first block, we don't
need to allocate the whole ring-buffer and we don't need the tail
either. However, we do this memory usage optimization only if the
first write is less than the tail size, which is also the input block
size, otherwise it is likely that other blocks will follow and we
will need to reallocate to the full size anyway. */
rb.pos_ = uint32(n)
ringBufferInitBuffer(rb.pos_, rb)
copy(rb.buffer_, bytes[:n])
return
}
if rb.cur_size_ < rb.total_size_ {
/* Lazily allocate the full buffer. */
ringBufferInitBuffer(rb.total_size_, rb)
/* Initialize the last two bytes to zero, so that we don't have to worry
later when we copy the last two bytes to the first two positions. */
rb.buffer_[rb.size_-2] = 0
rb.buffer_[rb.size_-1] = 0
}
{
var masked_pos uint = uint(rb.pos_ & rb.mask_)
/* The length of the writes is limited so that we do not need to worry
about a write */
ringBufferWriteTail(bytes, n, rb)
if uint32(masked_pos+n) <= rb.size_ {
/* A single write fits. */
copy(rb.buffer_[masked_pos:], bytes[:n])
} else {
/* Split into two writes.
Copy into the end of the buffer, including the tail buffer. */
copy(rb.buffer_[masked_pos:], bytes[:brotli_min_size_t(n, uint(rb.total_size_-uint32(masked_pos)))])
/* Copy into the beginning of the buffer */
copy(rb.buffer_, bytes[rb.size_-uint32(masked_pos):][:uint32(n)-(rb.size_-uint32(masked_pos))])
}
}
{
var not_first_lap bool = rb.pos_&(1<<31) != 0
var rb_pos_mask uint32 = (1 << 31) - 1
rb.data_[0] = rb.buffer_[rb.size_-2]
rb.data_[1] = rb.buffer_[rb.size_-1]
rb.pos_ = (rb.pos_ & rb_pos_mask) + uint32(uint32(n)&rb_pos_mask)
if not_first_lap {
/* Wrap, but preserve not-a-first-lap feature. */
rb.pos_ |= 1 << 31
}
}
}
+294
View File
@@ -0,0 +1,294 @@
package brotli
import "io"
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Brotli state for partial streaming decoding. */
const (
stateUninited = iota
stateLargeWindowBits
stateInitialize
stateMetablockBegin
stateMetablockHeader
stateMetablockHeader2
stateContextModes
stateCommandBegin
stateCommandInner
stateCommandPostDecodeLiterals
stateCommandPostWrapCopy
stateUncompressed
stateMetadata
stateCommandInnerWrite
stateMetablockDone
stateCommandPostWrite1
stateCommandPostWrite2
stateHuffmanCode0
stateHuffmanCode1
stateHuffmanCode2
stateHuffmanCode3
stateContextMap1
stateContextMap2
stateTreeGroup
stateDone
)
const (
stateMetablockHeaderNone = iota
stateMetablockHeaderEmpty
stateMetablockHeaderNibbles
stateMetablockHeaderSize
stateMetablockHeaderUncompressed
stateMetablockHeaderReserved
stateMetablockHeaderBytes
stateMetablockHeaderMetadata
)
const (
stateUncompressedNone = iota
stateUncompressedWrite
)
const (
stateTreeGroupNone = iota
stateTreeGroupLoop
)
const (
stateContextMapNone = iota
stateContextMapReadPrefix
stateContextMapHuffman
stateContextMapDecode
stateContextMapTransform
)
const (
stateHuffmanNone = iota
stateHuffmanSimpleSize
stateHuffmanSimpleRead
stateHuffmanSimpleBuild
stateHuffmanComplex
stateHuffmanLengthSymbols
)
const (
stateDecodeUint8None = iota
stateDecodeUint8Short
stateDecodeUint8Long
)
const (
stateReadBlockLengthNone = iota
stateReadBlockLengthSuffix
)
type Reader struct {
src io.Reader
buf []byte // scratch space for reading from src
in []byte // current chunk to decode; usually aliases buf
state int
loop_counter int
br bitReader
buffer struct {
u64 uint64
u8 [8]byte
}
buffer_length uint32
pos int
max_backward_distance int
max_distance int
ringbuffer_size int
ringbuffer_mask int
dist_rb_idx int
dist_rb [4]int
error_code int
sub_loop_counter uint32
ringbuffer []byte
ringbuffer_end []byte
htree_command []huffmanCode
context_lookup []byte
context_map_slice []byte
dist_context_map_slice []byte
literal_hgroup huffmanTreeGroup
insert_copy_hgroup huffmanTreeGroup
distance_hgroup huffmanTreeGroup
block_type_trees []huffmanCode
block_len_trees []huffmanCode
trivial_literal_context int
distance_context int
meta_block_remaining_len int
block_length_index uint32
block_length [3]uint32
num_block_types [3]uint32
block_type_rb [6]uint32
distance_postfix_bits uint32
num_direct_distance_codes uint32
distance_postfix_mask int
num_dist_htrees uint32
dist_context_map []byte
literal_htree []huffmanCode
dist_htree_index byte
repeat_code_len uint32
prev_code_len uint32
copy_length int
distance_code int
rb_roundtrips uint
partial_pos_out uint
symbol uint32
repeat uint32
space uint32
table [32]huffmanCode
symbol_lists symbolList
symbols_lists_array [huffmanMaxCodeLength + 1 + numCommandSymbols]uint16
next_symbol [32]int
code_length_code_lengths [codeLengthCodes]byte
code_length_histo [16]uint16
htree_index int
next []huffmanCode
context_index uint32
max_run_length_prefix uint32
code uint32
context_map_table [huffmanMaxSize272]huffmanCode
substate_metablock_header int
substate_tree_group int
substate_context_map int
substate_uncompressed int
substate_huffman int
substate_decode_uint8 int
substate_read_block_length int
is_last_metablock uint
is_uncompressed uint
is_metadata uint
should_wrap_ringbuffer uint
canny_ringbuffer_allocation uint
large_window bool
size_nibbles uint
window_bits uint32
new_ringbuffer_size int
num_literal_htrees uint32
context_map []byte
context_modes []byte
dictionary *dictionary
transforms *transforms
trivial_literal_contexts [8]uint32
}
func decoderStateInit(s *Reader) bool {
s.error_code = 0 /* BROTLI_DECODER_NO_ERROR */
initBitReader(&s.br)
s.state = stateUninited
s.large_window = false
s.substate_metablock_header = stateMetablockHeaderNone
s.substate_tree_group = stateTreeGroupNone
s.substate_context_map = stateContextMapNone
s.substate_uncompressed = stateUncompressedNone
s.substate_huffman = stateHuffmanNone
s.substate_decode_uint8 = stateDecodeUint8None
s.substate_read_block_length = stateReadBlockLengthNone
s.buffer_length = 0
s.loop_counter = 0
s.pos = 0
s.rb_roundtrips = 0
s.partial_pos_out = 0
s.block_type_trees = nil
s.block_len_trees = nil
s.ringbuffer_size = 0
s.new_ringbuffer_size = 0
s.ringbuffer_mask = 0
s.context_map = nil
s.context_modes = nil
s.dist_context_map = nil
s.context_map_slice = nil
s.dist_context_map_slice = nil
s.sub_loop_counter = 0
s.literal_hgroup.codes = nil
s.literal_hgroup.htrees = nil
s.insert_copy_hgroup.codes = nil
s.insert_copy_hgroup.htrees = nil
s.distance_hgroup.codes = nil
s.distance_hgroup.htrees = nil
s.is_last_metablock = 0
s.is_uncompressed = 0
s.is_metadata = 0
s.should_wrap_ringbuffer = 0
s.canny_ringbuffer_allocation = 1
s.window_bits = 0
s.max_distance = 0
s.dist_rb[0] = 16
s.dist_rb[1] = 15
s.dist_rb[2] = 11
s.dist_rb[3] = 4
s.dist_rb_idx = 0
s.block_type_trees = nil
s.block_len_trees = nil
s.symbol_lists.storage = s.symbols_lists_array[:]
s.symbol_lists.offset = huffmanMaxCodeLength + 1
s.dictionary = getDictionary()
s.transforms = getTransforms()
return true
}
func decoderStateMetablockBegin(s *Reader) {
s.meta_block_remaining_len = 0
s.block_length[0] = 1 << 24
s.block_length[1] = 1 << 24
s.block_length[2] = 1 << 24
s.num_block_types[0] = 1
s.num_block_types[1] = 1
s.num_block_types[2] = 1
s.block_type_rb[0] = 1
s.block_type_rb[1] = 0
s.block_type_rb[2] = 1
s.block_type_rb[3] = 0
s.block_type_rb[4] = 1
s.block_type_rb[5] = 0
s.context_map = nil
s.context_modes = nil
s.dist_context_map = nil
s.context_map_slice = nil
s.literal_htree = nil
s.dist_context_map_slice = nil
s.dist_htree_index = 0
s.context_lookup = nil
s.literal_hgroup.codes = nil
s.literal_hgroup.htrees = nil
s.insert_copy_hgroup.codes = nil
s.insert_copy_hgroup.htrees = nil
s.distance_hgroup.codes = nil
s.distance_hgroup.htrees = nil
}
func decoderStateCleanupAfterMetablock(s *Reader) {
s.context_modes = nil
s.context_map = nil
s.dist_context_map = nil
s.literal_hgroup.htrees = nil
s.insert_copy_hgroup.htrees = nil
s.distance_hgroup.htrees = nil
}
func decoderHuffmanTreeGroupInit(s *Reader, group *huffmanTreeGroup, alphabet_size uint32, max_symbol uint32, ntrees uint32) bool {
var max_table_size uint = uint(kMaxHuffmanTableSize[(alphabet_size+31)>>5])
group.alphabet_size = uint16(alphabet_size)
group.max_symbol = uint16(max_symbol)
group.num_htrees = uint16(ntrees)
group.htrees = make([][]huffmanCode, ntrees)
group.codes = make([]huffmanCode, (uint(ntrees) * max_table_size))
return !(group.codes == nil)
}
+662
View File
@@ -0,0 +1,662 @@
package brotli
import "encoding/binary"
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Class to model the static dictionary. */
const maxStaticDictionaryMatchLen = 37
const kInvalidMatch uint32 = 0xFFFFFFF
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func hash(data []byte) uint32 {
var h uint32 = binary.LittleEndian.Uint32(data) * kDictHashMul32
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> uint(32-kDictNumBits)
}
func addMatch(distance uint, len uint, len_code uint, matches []uint32) {
var match uint32 = uint32((distance << 5) + len_code)
matches[len] = brotli_min_uint32_t(matches[len], match)
}
func dictMatchLength(dict *dictionary, data []byte, id uint, len uint, maxlen uint) uint {
var offset uint = uint(dict.offsets_by_length[len]) + len*id
return findMatchLengthWithLimit(dict.data[offset:], data, brotli_min_size_t(uint(len), maxlen))
}
func isMatch(d *dictionary, w dictWord, data []byte, max_length uint) bool {
if uint(w.len) > max_length {
return false
} else {
var offset uint = uint(d.offsets_by_length[w.len]) + uint(w.len)*uint(w.idx)
var dict []byte = d.data[offset:]
if w.transform == 0 {
/* Match against base dictionary word. */
return findMatchLengthWithLimit(dict, data, uint(w.len)) == uint(w.len)
} else if w.transform == 10 {
/* Match against uppercase first transform.
Note that there are only ASCII uppercase words in the lookup table. */
return dict[0] >= 'a' && dict[0] <= 'z' && (dict[0]^32) == data[0] && findMatchLengthWithLimit(dict[1:], data[1:], uint(w.len)-1) == uint(w.len-1)
} else {
/* Match against uppercase all transform.
Note that there are only ASCII uppercase words in the lookup table. */
var i uint
for i = 0; i < uint(w.len); i++ {
if dict[i] >= 'a' && dict[i] <= 'z' {
if (dict[i] ^ 32) != data[i] {
return false
}
} else {
if dict[i] != data[i] {
return false
}
}
}
return true
}
}
}
func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_length uint, max_length uint, matches []uint32) bool {
var has_found_match bool = false
{
var offset uint = uint(dict.buckets[hash(data)])
var end bool = offset == 0
for !end {
w := dict.dict_words[offset]
offset++
var l uint = uint(w.len) & 0x1F
var n uint = uint(1) << dict.words.size_bits_by_length[l]
var id uint = uint(w.idx)
end = !(w.len&0x80 == 0)
w.len = byte(l)
if w.transform == 0 {
var matchlen uint = dictMatchLength(dict.words, data, id, l, max_length)
var s []byte
var minlen uint
var maxlen uint
var len uint
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
if matchlen == l {
addMatch(id, l, l, matches)
has_found_match = true
}
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
if matchlen >= l-1 {
addMatch(id+12*n, l-1, l, matches)
if l+2 < max_length && data[l-1] == 'i' && data[l] == 'n' && data[l+1] == 'g' && data[l+2] == ' ' {
addMatch(id+49*n, l+3, l, matches)
}
has_found_match = true
}
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
minlen = min_length
if l > 9 {
minlen = brotli_max_size_t(minlen, l-9)
}
maxlen = brotli_min_size_t(matchlen, l-2)
for len = minlen; len <= maxlen; len++ {
var cut uint = l - len
var transform_id uint = (cut << 2) + uint((dict.cutoffTransforms>>(cut*6))&0x3F)
addMatch(id+transform_id*n, uint(len), l, matches)
has_found_match = true
}
if matchlen < l || l+6 >= max_length {
continue
}
s = data[l:]
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
if s[0] == ' ' {
addMatch(id+n, l+1, l, matches)
if s[1] == 'a' {
if s[2] == ' ' {
addMatch(id+28*n, l+3, l, matches)
} else if s[2] == 's' {
if s[3] == ' ' {
addMatch(id+46*n, l+4, l, matches)
}
} else if s[2] == 't' {
if s[3] == ' ' {
addMatch(id+60*n, l+4, l, matches)
}
} else if s[2] == 'n' {
if s[3] == 'd' && s[4] == ' ' {
addMatch(id+10*n, l+5, l, matches)
}
}
} else if s[1] == 'b' {
if s[2] == 'y' && s[3] == ' ' {
addMatch(id+38*n, l+4, l, matches)
}
} else if s[1] == 'i' {
if s[2] == 'n' {
if s[3] == ' ' {
addMatch(id+16*n, l+4, l, matches)
}
} else if s[2] == 's' {
if s[3] == ' ' {
addMatch(id+47*n, l+4, l, matches)
}
}
} else if s[1] == 'f' {
if s[2] == 'o' {
if s[3] == 'r' && s[4] == ' ' {
addMatch(id+25*n, l+5, l, matches)
}
} else if s[2] == 'r' {
if s[3] == 'o' && s[4] == 'm' && s[5] == ' ' {
addMatch(id+37*n, l+6, l, matches)
}
}
} else if s[1] == 'o' {
if s[2] == 'f' {
if s[3] == ' ' {
addMatch(id+8*n, l+4, l, matches)
}
} else if s[2] == 'n' {
if s[3] == ' ' {
addMatch(id+45*n, l+4, l, matches)
}
}
} else if s[1] == 'n' {
if s[2] == 'o' && s[3] == 't' && s[4] == ' ' {
addMatch(id+80*n, l+5, l, matches)
}
} else if s[1] == 't' {
if s[2] == 'h' {
if s[3] == 'e' {
if s[4] == ' ' {
addMatch(id+5*n, l+5, l, matches)
}
} else if s[3] == 'a' {
if s[4] == 't' && s[5] == ' ' {
addMatch(id+29*n, l+6, l, matches)
}
}
} else if s[2] == 'o' {
if s[3] == ' ' {
addMatch(id+17*n, l+4, l, matches)
}
}
} else if s[1] == 'w' {
if s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ' {
addMatch(id+35*n, l+6, l, matches)
}
}
} else if s[0] == '"' {
addMatch(id+19*n, l+1, l, matches)
if s[1] == '>' {
addMatch(id+21*n, l+2, l, matches)
}
} else if s[0] == '.' {
addMatch(id+20*n, l+1, l, matches)
if s[1] == ' ' {
addMatch(id+31*n, l+2, l, matches)
if s[2] == 'T' && s[3] == 'h' {
if s[4] == 'e' {
if s[5] == ' ' {
addMatch(id+43*n, l+6, l, matches)
}
} else if s[4] == 'i' {
if s[5] == 's' && s[6] == ' ' {
addMatch(id+75*n, l+7, l, matches)
}
}
}
}
} else if s[0] == ',' {
addMatch(id+76*n, l+1, l, matches)
if s[1] == ' ' {
addMatch(id+14*n, l+2, l, matches)
}
} else if s[0] == '\n' {
addMatch(id+22*n, l+1, l, matches)
if s[1] == '\t' {
addMatch(id+50*n, l+2, l, matches)
}
} else if s[0] == ']' {
addMatch(id+24*n, l+1, l, matches)
} else if s[0] == '\'' {
addMatch(id+36*n, l+1, l, matches)
} else if s[0] == ':' {
addMatch(id+51*n, l+1, l, matches)
} else if s[0] == '(' {
addMatch(id+57*n, l+1, l, matches)
} else if s[0] == '=' {
if s[1] == '"' {
addMatch(id+70*n, l+2, l, matches)
} else if s[1] == '\'' {
addMatch(id+86*n, l+2, l, matches)
}
} else if s[0] == 'a' {
if s[1] == 'l' && s[2] == ' ' {
addMatch(id+84*n, l+3, l, matches)
}
} else if s[0] == 'e' {
if s[1] == 'd' {
if s[2] == ' ' {
addMatch(id+53*n, l+3, l, matches)
}
} else if s[1] == 'r' {
if s[2] == ' ' {
addMatch(id+82*n, l+3, l, matches)
}
} else if s[1] == 's' {
if s[2] == 't' && s[3] == ' ' {
addMatch(id+95*n, l+4, l, matches)
}
}
} else if s[0] == 'f' {
if s[1] == 'u' && s[2] == 'l' && s[3] == ' ' {
addMatch(id+90*n, l+4, l, matches)
}
} else if s[0] == 'i' {
if s[1] == 'v' {
if s[2] == 'e' && s[3] == ' ' {
addMatch(id+92*n, l+4, l, matches)
}
} else if s[1] == 'z' {
if s[2] == 'e' && s[3] == ' ' {
addMatch(id+100*n, l+4, l, matches)
}
}
} else if s[0] == 'l' {
if s[1] == 'e' {
if s[2] == 's' && s[3] == 's' && s[4] == ' ' {
addMatch(id+93*n, l+5, l, matches)
}
} else if s[1] == 'y' {
if s[2] == ' ' {
addMatch(id+61*n, l+3, l, matches)
}
}
} else if s[0] == 'o' {
if s[1] == 'u' && s[2] == 's' && s[3] == ' ' {
addMatch(id+106*n, l+4, l, matches)
}
}
} else {
var is_all_caps bool = (w.transform != transformUppercaseFirst)
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
transform. */
var s []byte
if !isMatch(dict.words, w, data, max_length) {
continue
}
/* Transform "" + kUppercase{First,All} + "" */
var tmp int
if is_all_caps {
tmp = 44
} else {
tmp = 9
}
addMatch(id+uint(tmp)*n, l, l, matches)
has_found_match = true
if l+1 >= max_length {
continue
}
/* Transforms "" + kUppercase{First,All} + <suffix> */
s = data[l:]
if s[0] == ' ' {
var tmp int
if is_all_caps {
tmp = 68
} else {
tmp = 4
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
} else if s[0] == '"' {
var tmp int
if is_all_caps {
tmp = 87
} else {
tmp = 66
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
if s[1] == '>' {
var tmp int
if is_all_caps {
tmp = 97
} else {
tmp = 69
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
}
} else if s[0] == '.' {
var tmp int
if is_all_caps {
tmp = 101
} else {
tmp = 79
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
if s[1] == ' ' {
var tmp int
if is_all_caps {
tmp = 114
} else {
tmp = 88
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
}
} else if s[0] == ',' {
var tmp int
if is_all_caps {
tmp = 112
} else {
tmp = 99
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
if s[1] == ' ' {
var tmp int
if is_all_caps {
tmp = 107
} else {
tmp = 58
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
}
} else if s[0] == '\'' {
var tmp int
if is_all_caps {
tmp = 94
} else {
tmp = 74
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
} else if s[0] == '(' {
var tmp int
if is_all_caps {
tmp = 113
} else {
tmp = 78
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
} else if s[0] == '=' {
if s[1] == '"' {
var tmp int
if is_all_caps {
tmp = 105
} else {
tmp = 104
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
} else if s[1] == '\'' {
var tmp int
if is_all_caps {
tmp = 116
} else {
tmp = 108
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
}
}
}
}
}
/* Transforms with prefixes " " and "." */
if max_length >= 5 && (data[0] == ' ' || data[0] == '.') {
var is_space bool = (data[0] == ' ')
var offset uint = uint(dict.buckets[hash(data[1:])])
var end bool = offset == 0
for !end {
w := dict.dict_words[offset]
offset++
var l uint = uint(w.len) & 0x1F
var n uint = uint(1) << dict.words.size_bits_by_length[l]
var id uint = uint(w.idx)
end = !(w.len&0x80 == 0)
w.len = byte(l)
if w.transform == 0 {
var s []byte
if !isMatch(dict.words, w, data[1:], max_length-1) {
continue
}
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
"." + BROTLI_TRANSFORM_IDENTITY + "" */
var tmp int
if is_space {
tmp = 6
} else {
tmp = 32
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
has_found_match = true
if l+2 >= max_length {
continue
}
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
*/
s = data[l+1:]
if s[0] == ' ' {
var tmp int
if is_space {
tmp = 2
} else {
tmp = 77
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
} else if s[0] == '(' {
var tmp int
if is_space {
tmp = 89
} else {
tmp = 67
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
} else if is_space {
if s[0] == ',' {
addMatch(id+103*n, l+2, l, matches)
if s[1] == ' ' {
addMatch(id+33*n, l+3, l, matches)
}
} else if s[0] == '.' {
addMatch(id+71*n, l+2, l, matches)
if s[1] == ' ' {
addMatch(id+52*n, l+3, l, matches)
}
} else if s[0] == '=' {
if s[1] == '"' {
addMatch(id+81*n, l+3, l, matches)
} else if s[1] == '\'' {
addMatch(id+98*n, l+3, l, matches)
}
}
}
} else if is_space {
var is_all_caps bool = (w.transform != transformUppercaseFirst)
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
transform. */
var s []byte
if !isMatch(dict.words, w, data[1:], max_length-1) {
continue
}
/* Transforms " " + kUppercase{First,All} + "" */
var tmp int
if is_all_caps {
tmp = 85
} else {
tmp = 30
}
addMatch(id+uint(tmp)*n, l+1, l, matches)
has_found_match = true
if l+2 >= max_length {
continue
}
/* Transforms " " + kUppercase{First,All} + <suffix> */
s = data[l+1:]
if s[0] == ' ' {
var tmp int
if is_all_caps {
tmp = 83
} else {
tmp = 15
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
} else if s[0] == ',' {
if !is_all_caps {
addMatch(id+109*n, l+2, l, matches)
}
if s[1] == ' ' {
var tmp int
if is_all_caps {
tmp = 111
} else {
tmp = 65
}
addMatch(id+uint(tmp)*n, l+3, l, matches)
}
} else if s[0] == '.' {
var tmp int
if is_all_caps {
tmp = 115
} else {
tmp = 96
}
addMatch(id+uint(tmp)*n, l+2, l, matches)
if s[1] == ' ' {
var tmp int
if is_all_caps {
tmp = 117
} else {
tmp = 91
}
addMatch(id+uint(tmp)*n, l+3, l, matches)
}
} else if s[0] == '=' {
if s[1] == '"' {
var tmp int
if is_all_caps {
tmp = 110
} else {
tmp = 118
}
addMatch(id+uint(tmp)*n, l+3, l, matches)
} else if s[1] == '\'' {
var tmp int
if is_all_caps {
tmp = 119
} else {
tmp = 120
}
addMatch(id+uint(tmp)*n, l+3, l, matches)
}
}
}
}
}
if max_length >= 6 {
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
if (data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || (data[0] == 0xC2 && data[1] == 0xA0) {
var offset uint = uint(dict.buckets[hash(data[2:])])
var end bool = offset == 0
for !end {
w := dict.dict_words[offset]
offset++
var l uint = uint(w.len) & 0x1F
var n uint = uint(1) << dict.words.size_bits_by_length[l]
var id uint = uint(w.idx)
end = !(w.len&0x80 == 0)
w.len = byte(l)
if w.transform == 0 && isMatch(dict.words, w, data[2:], max_length-2) {
if data[0] == 0xC2 {
addMatch(id+102*n, l+2, l, matches)
has_found_match = true
} else if l+2 < max_length && data[l+2] == ' ' {
var t uint = 13
if data[0] == 'e' {
t = 18
} else if data[0] == 's' {
t = 7
}
addMatch(id+t*n, l+3, l, matches)
has_found_match = true
}
}
}
}
}
if max_length >= 9 {
/* Transforms with prefixes " the " and ".com/" */
if (data[0] == ' ' && data[1] == 't' && data[2] == 'h' && data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/') {
var offset uint = uint(dict.buckets[hash(data[5:])])
var end bool = offset == 0
for !end {
w := dict.dict_words[offset]
offset++
var l uint = uint(w.len) & 0x1F
var n uint = uint(1) << dict.words.size_bits_by_length[l]
var id uint = uint(w.idx)
end = !(w.len&0x80 == 0)
w.len = byte(l)
if w.transform == 0 && isMatch(dict.words, w, data[5:], max_length-5) {
var tmp int
if data[0] == ' ' {
tmp = 41
} else {
tmp = 72
}
addMatch(id+uint(tmp)*n, l+5, l, matches)
has_found_match = true
if l+5 < max_length {
var s []byte = data[l+5:]
if data[0] == ' ' {
if l+8 < max_length && s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ' {
addMatch(id+62*n, l+9, l, matches)
if l+12 < max_length && s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ' {
addMatch(id+73*n, l+13, l, matches)
}
}
}
}
}
}
}
}
return has_found_match
}
File diff suppressed because it is too large Load Diff
+22
View File
@@ -0,0 +1,22 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Utilities for building Huffman decoding tables. */
type symbolList struct {
storage []uint16
offset int
}
func symbolListGet(sl symbolList, i int) uint16 {
return sl.storage[i+sl.offset]
}
func symbolListPut(sl symbolList, i int, val uint16) {
sl.storage[i+sl.offset] = val
}
+641
View File
@@ -0,0 +1,641 @@
package brotli
const (
transformIdentity = 0
transformOmitLast1 = 1
transformOmitLast2 = 2
transformOmitLast3 = 3
transformOmitLast4 = 4
transformOmitLast5 = 5
transformOmitLast6 = 6
transformOmitLast7 = 7
transformOmitLast8 = 8
transformOmitLast9 = 9
transformUppercaseFirst = 10
transformUppercaseAll = 11
transformOmitFirst1 = 12
transformOmitFirst2 = 13
transformOmitFirst3 = 14
transformOmitFirst4 = 15
transformOmitFirst5 = 16
transformOmitFirst6 = 17
transformOmitFirst7 = 18
transformOmitFirst8 = 19
transformOmitFirst9 = 20
transformShiftFirst = 21
transformShiftAll = 22 + iota - 22
numTransformTypes
)
const transformsMaxCutOff = transformOmitLast9
type transforms struct {
prefix_suffix_size uint16
prefix_suffix []byte
prefix_suffix_map []uint16
num_transforms uint32
transforms []byte
params []byte
cutOffTransforms [transformsMaxCutOff + 1]int16
}
func transformPrefixId(t *transforms, I int) byte {
return t.transforms[(I*3)+0]
}
func transformType(t *transforms, I int) byte {
return t.transforms[(I*3)+1]
}
func transformSuffixId(t *transforms, I int) byte {
return t.transforms[(I*3)+2]
}
func transformPrefix(t *transforms, I int) []byte {
return t.prefix_suffix[t.prefix_suffix_map[transformPrefixId(t, I)]:]
}
func transformSuffix(t *transforms, I int) []byte {
return t.prefix_suffix[t.prefix_suffix_map[transformSuffixId(t, I)]:]
}
/* RFC 7932 transforms string data */
const kPrefixSuffix string = "\001 \002, \010 of the \004 of \002s \001.\005 and \004 " + "in \001\"\004 to \002\">\001\n\002. \001]\005 for \003 a \006 " + "that \001'\006 with \006 from \004 by \001(\006. T" + "he \004 on \004 as \004 is \004ing \002\n\t\001:\003ed " + "\002=\"\004 at \003ly \001,\002='\005.com/\007. This \005" + " not \003er \003al \004ful \004ive \005less \004es" + "t \004ize \002\xc2\xa0\004ous \005 the \002e \000"
var kPrefixSuffixMap = [50]uint16{
0x00,
0x02,
0x05,
0x0E,
0x13,
0x16,
0x18,
0x1E,
0x23,
0x25,
0x2A,
0x2D,
0x2F,
0x32,
0x34,
0x3A,
0x3E,
0x45,
0x47,
0x4E,
0x55,
0x5A,
0x5C,
0x63,
0x68,
0x6D,
0x72,
0x77,
0x7A,
0x7C,
0x80,
0x83,
0x88,
0x8C,
0x8E,
0x91,
0x97,
0x9F,
0xA5,
0xA9,
0xAD,
0xB2,
0xB7,
0xBD,
0xC2,
0xC7,
0xCA,
0xCF,
0xD5,
0xD8,
}
/* RFC 7932 transforms */
var kTransformsData = []byte{
49,
transformIdentity,
49,
49,
transformIdentity,
0,
0,
transformIdentity,
0,
49,
transformOmitFirst1,
49,
49,
transformUppercaseFirst,
0,
49,
transformIdentity,
47,
0,
transformIdentity,
49,
4,
transformIdentity,
0,
49,
transformIdentity,
3,
49,
transformUppercaseFirst,
49,
49,
transformIdentity,
6,
49,
transformOmitFirst2,
49,
49,
transformOmitLast1,
49,
1,
transformIdentity,
0,
49,
transformIdentity,
1,
0,
transformUppercaseFirst,
0,
49,
transformIdentity,
7,
49,
transformIdentity,
9,
48,
transformIdentity,
0,
49,
transformIdentity,
8,
49,
transformIdentity,
5,
49,
transformIdentity,
10,
49,
transformIdentity,
11,
49,
transformOmitLast3,
49,
49,
transformIdentity,
13,
49,
transformIdentity,
14,
49,
transformOmitFirst3,
49,
49,
transformOmitLast2,
49,
49,
transformIdentity,
15,
49,
transformIdentity,
16,
0,
transformUppercaseFirst,
49,
49,
transformIdentity,
12,
5,
transformIdentity,
49,
0,
transformIdentity,
1,
49,
transformOmitFirst4,
49,
49,
transformIdentity,
18,
49,
transformIdentity,
17,
49,
transformIdentity,
19,
49,
transformIdentity,
20,
49,
transformOmitFirst5,
49,
49,
transformOmitFirst6,
49,
47,
transformIdentity,
49,
49,
transformOmitLast4,
49,
49,
transformIdentity,
22,
49,
transformUppercaseAll,
49,
49,
transformIdentity,
23,
49,
transformIdentity,
24,
49,
transformIdentity,
25,
49,
transformOmitLast7,
49,
49,
transformOmitLast1,
26,
49,
transformIdentity,
27,
49,
transformIdentity,
28,
0,
transformIdentity,
12,
49,
transformIdentity,
29,
49,
transformOmitFirst9,
49,
49,
transformOmitFirst7,
49,
49,
transformOmitLast6,
49,
49,
transformIdentity,
21,
49,
transformUppercaseFirst,
1,
49,
transformOmitLast8,
49,
49,
transformIdentity,
31,
49,
transformIdentity,
32,
47,
transformIdentity,
3,
49,
transformOmitLast5,
49,
49,
transformOmitLast9,
49,
0,
transformUppercaseFirst,
1,
49,
transformUppercaseFirst,
8,
5,
transformIdentity,
21,
49,
transformUppercaseAll,
0,
49,
transformUppercaseFirst,
10,
49,
transformIdentity,
30,
0,
transformIdentity,
5,
35,
transformIdentity,
49,
47,
transformIdentity,
2,
49,
transformUppercaseFirst,
17,
49,
transformIdentity,
36,
49,
transformIdentity,
33,
5,
transformIdentity,
0,
49,
transformUppercaseFirst,
21,
49,
transformUppercaseFirst,
5,
49,
transformIdentity,
37,
0,
transformIdentity,
30,
49,
transformIdentity,
38,
0,
transformUppercaseAll,
0,
49,
transformIdentity,
39,
0,
transformUppercaseAll,
49,
49,
transformIdentity,
34,
49,
transformUppercaseAll,
8,
49,
transformUppercaseFirst,
12,
0,
transformIdentity,
21,
49,
transformIdentity,
40,
0,
transformUppercaseFirst,
12,
49,
transformIdentity,
41,
49,
transformIdentity,
42,
49,
transformUppercaseAll,
17,
49,
transformIdentity,
43,
0,
transformUppercaseFirst,
5,
49,
transformUppercaseAll,
10,
0,
transformIdentity,
34,
49,
transformUppercaseFirst,
33,
49,
transformIdentity,
44,
49,
transformUppercaseAll,
5,
45,
transformIdentity,
49,
0,
transformIdentity,
33,
49,
transformUppercaseFirst,
30,
49,
transformUppercaseAll,
30,
49,
transformIdentity,
46,
49,
transformUppercaseAll,
1,
49,
transformUppercaseFirst,
34,
0,
transformUppercaseFirst,
33,
0,
transformUppercaseAll,
30,
0,
transformUppercaseAll,
1,
49,
transformUppercaseAll,
33,
49,
transformUppercaseAll,
21,
49,
transformUppercaseAll,
12,
0,
transformUppercaseAll,
5,
49,
transformUppercaseAll,
34,
0,
transformUppercaseAll,
12,
0,
transformUppercaseFirst,
30,
0,
transformUppercaseAll,
34,
0,
transformUppercaseFirst,
34,
}
var kBrotliTransforms = transforms{
217,
[]byte(kPrefixSuffix),
kPrefixSuffixMap[:],
121,
kTransformsData,
nil, /* no extra parameters */
[transformsMaxCutOff + 1]int16{0, 12, 27, 23, 42, 63, 56, 48, 59, 64},
}
func getTransforms() *transforms {
return &kBrotliTransforms
}
func toUpperCase(p []byte) int {
if p[0] < 0xC0 {
if p[0] >= 'a' && p[0] <= 'z' {
p[0] ^= 32
}
return 1
}
/* An overly simplified uppercasing model for UTF-8. */
if p[0] < 0xE0 {
p[1] ^= 32
return 2
}
/* An arbitrary transform for three byte characters. */
p[2] ^= 5
return 3
}
func shiftTransform(word []byte, word_len int, parameter uint16) int {
/* Limited sign extension: scalar < (1 << 24). */
var scalar uint32 = (uint32(parameter) & 0x7FFF) + (0x1000000 - (uint32(parameter) & 0x8000))
if word[0] < 0x80 {
/* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
scalar += uint32(word[0])
word[0] = byte(scalar & 0x7F)
return 1
} else if word[0] < 0xC0 {
/* Continuation / 10AAAAAA. */
return 1
} else if word[0] < 0xE0 {
/* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
if word_len < 2 {
return 1
}
scalar += uint32(word[1]&0x3F | (word[0]&0x1F)<<6)
word[0] = byte(0xC0 | (scalar>>6)&0x1F)
word[1] = byte(uint32(word[1]&0xC0) | scalar&0x3F)
return 2
} else if word[0] < 0xF0 {
/* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
if word_len < 3 {
return word_len
}
scalar += uint32(word[2])&0x3F | uint32(word[1]&0x3F)<<6 | uint32(word[0]&0x0F)<<12
word[0] = byte(0xE0 | (scalar>>12)&0x0F)
word[1] = byte(uint32(word[1]&0xC0) | (scalar>>6)&0x3F)
word[2] = byte(uint32(word[2]&0xC0) | scalar&0x3F)
return 3
} else if word[0] < 0xF8 {
/* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
if word_len < 4 {
return word_len
}
scalar += uint32(word[3])&0x3F | uint32(word[2]&0x3F)<<6 | uint32(word[1]&0x3F)<<12 | uint32(word[0]&0x07)<<18
word[0] = byte(0xF0 | (scalar>>18)&0x07)
word[1] = byte(uint32(word[1]&0xC0) | (scalar>>12)&0x3F)
word[2] = byte(uint32(word[2]&0xC0) | (scalar>>6)&0x3F)
word[3] = byte(uint32(word[3]&0xC0) | scalar&0x3F)
return 4
}
return 1
}
func transformDictionaryWord(dst []byte, word []byte, len int, trans *transforms, transform_idx int) int {
var idx int = 0
var prefix []byte = transformPrefix(trans, transform_idx)
var type_ byte = transformType(trans, transform_idx)
var suffix []byte = transformSuffix(trans, transform_idx)
{
var prefix_len int = int(prefix[0])
prefix = prefix[1:]
for {
tmp1 := prefix_len
prefix_len--
if tmp1 == 0 {
break
}
dst[idx] = prefix[0]
idx++
prefix = prefix[1:]
}
}
{
var t int = int(type_)
var i int = 0
if t <= transformOmitLast9 {
len -= t
} else if t >= transformOmitFirst1 && t <= transformOmitFirst9 {
var skip int = t - (transformOmitFirst1 - 1)
word = word[skip:]
len -= skip
}
for i < len {
dst[idx] = word[i]
idx++
i++
}
if t == transformUppercaseFirst {
toUpperCase(dst[idx-len:])
} else if t == transformUppercaseAll {
var uppercase []byte = dst
uppercase = uppercase[idx-len:]
for len > 0 {
var step int = toUpperCase(uppercase)
uppercase = uppercase[step:]
len -= step
}
} else if t == transformShiftFirst {
var param uint16 = uint16(trans.params[transform_idx*2]) + uint16(trans.params[transform_idx*2+1])<<8
shiftTransform(dst[idx-len:], int(len), param)
} else if t == transformShiftAll {
var param uint16 = uint16(trans.params[transform_idx*2]) + uint16(trans.params[transform_idx*2+1])<<8
var shift []byte = dst
shift = shift[idx-len:]
for len > 0 {
var step int = shiftTransform(shift, int(len), param)
shift = shift[step:]
len -= step
}
}
}
{
var suffix_len int = int(suffix[0])
suffix = suffix[1:]
for {
tmp2 := suffix_len
suffix_len--
if tmp2 == 0 {
break
}
dst[idx] = suffix[0]
idx++
suffix = suffix[1:]
}
return idx
}
}
+70
View File
@@ -0,0 +1,70 @@
package brotli
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Heuristics for deciding about the UTF8-ness of strings. */
const kMinUTF8Ratio float64 = 0.75
/* Returns 1 if at least min_fraction of the bytes between pos and
pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
returns 0. */
func parseAsUTF8(symbol *int, input []byte, size uint) uint {
/* ASCII */
if input[0]&0x80 == 0 {
*symbol = int(input[0])
if *symbol > 0 {
return 1
}
}
/* 2-byte UTF8 */
if size > 1 && input[0]&0xE0 == 0xC0 && input[1]&0xC0 == 0x80 {
*symbol = (int(input[0])&0x1F)<<6 | int(input[1])&0x3F
if *symbol > 0x7F {
return 2
}
}
/* 3-byte UFT8 */
if size > 2 && input[0]&0xF0 == 0xE0 && input[1]&0xC0 == 0x80 && input[2]&0xC0 == 0x80 {
*symbol = (int(input[0])&0x0F)<<12 | (int(input[1])&0x3F)<<6 | int(input[2])&0x3F
if *symbol > 0x7FF {
return 3
}
}
/* 4-byte UFT8 */
if size > 3 && input[0]&0xF8 == 0xF0 && input[1]&0xC0 == 0x80 && input[2]&0xC0 == 0x80 && input[3]&0xC0 == 0x80 {
*symbol = (int(input[0])&0x07)<<18 | (int(input[1])&0x3F)<<12 | (int(input[2])&0x3F)<<6 | int(input[3])&0x3F
if *symbol > 0xFFFF && *symbol <= 0x10FFFF {
return 4
}
}
/* Not UTF8, emit a special symbol above the UTF8-code space */
*symbol = 0x110000 | int(input[0])
return 1
}
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
func isMostlyUTF8(data []byte, pos uint, mask uint, length uint, min_fraction float64) bool {
var size_utf8 uint = 0
var i uint = 0
for i < length {
var symbol int
current_data := data[(pos+i)&mask:]
var bytes_read uint = parseAsUTF8(&symbol, current_data, length-i)
i += bytes_read
if symbol < 0x110000 {
size_utf8 += bytes_read
}
}
return float64(size_utf8) > min_fraction*float64(length)
}
+7
View File
@@ -0,0 +1,7 @@
package brotli
func assert(cond bool) {
if !cond {
panic("assertion failure")
}
}
+52
View File
@@ -0,0 +1,52 @@
package brotli
import "encoding/binary"
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Write bits into a byte array. */
/* This function writes bits into bytes in increasing addresses, and within
a byte least-significant-bit first.
The function can write up to 56 bits in one go with WriteBits
Example: let's assume that 3 bits (Rs below) have been written already:
BYTE-0 BYTE+1 BYTE+2
0000 0RRR 0000 0000 0000 0000
Now, we could write 5 or less bits in MSB by just sifting by 3
and OR'ing to BYTE-0.
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
and locate the rest in BYTE+1, BYTE+2, etc. */
func writeBits(n_bits uint, bits uint64, pos *uint, array []byte) {
/* This branch of the code can write up to 56 bits at a time,
7 bits are lost by being perhaps already in *p and at least
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
bits are in *p and we write 57 bits, then the next write will
access a byte that was never initialized). */
p := array[*pos>>3:]
v := uint64(p[0])
v |= bits << (*pos & 7)
binary.LittleEndian.PutUint64(p, v)
*pos += n_bits
}
func writeSingleBit(bit bool, pos *uint, array []byte) {
if bit {
writeBits(1, 1, pos, array)
} else {
writeBits(1, 0, pos, array)
}
}
func writeBitsPrepareStorage(pos uint, array []byte) {
assert(pos&7 == 0)
array[pos>>3] = 0
}
+162
View File
@@ -0,0 +1,162 @@
package brotli
import (
"errors"
"io"
"github.com/andybalholm/brotli/matchfinder"
)
const (
BestSpeed = 0
BestCompression = 11
DefaultCompression = 6
)
// WriterOptions configures Writer.
type WriterOptions struct {
// Quality controls the compression-speed vs compression-density trade-offs.
// The higher the quality, the slower the compression. Range is 0 to 11.
Quality int
// LGWin is the base 2 logarithm of the sliding window size.
// Range is 10 to 24. 0 indicates automatic configuration based on Quality.
LGWin int
}
var (
errEncode = errors.New("brotli: encode error")
errWriterClosed = errors.New("brotli: Writer is closed")
)
// Writes to the returned writer are compressed and written to dst.
// It is the caller's responsibility to call Close on the Writer when done.
// Writes may be buffered and not flushed until Close.
func NewWriter(dst io.Writer) *Writer {
return NewWriterLevel(dst, DefaultCompression)
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
// The compression level can be DefaultCompression or any integer value between
// BestSpeed and BestCompression inclusive.
func NewWriterLevel(dst io.Writer, level int) *Writer {
return NewWriterOptions(dst, WriterOptions{
Quality: level,
})
}
// NewWriterOptions is like NewWriter but specifies WriterOptions
func NewWriterOptions(dst io.Writer, options WriterOptions) *Writer {
w := new(Writer)
w.options = options
w.Reset(dst)
return w
}
// Reset discards the Writer's state and makes it equivalent to the result of
// its original state from NewWriter or NewWriterLevel, but writing to dst
// instead. This permits reusing a Writer rather than allocating a new one.
func (w *Writer) Reset(dst io.Writer) {
encoderInitState(w)
w.params.quality = w.options.Quality
if w.options.LGWin > 0 {
w.params.lgwin = uint(w.options.LGWin)
}
w.dst = dst
w.err = nil
}
func (w *Writer) writeChunk(p []byte, op int) (n int, err error) {
if w.dst == nil {
return 0, errWriterClosed
}
if w.err != nil {
return 0, w.err
}
for {
availableIn := uint(len(p))
nextIn := p
success := encoderCompressStream(w, op, &availableIn, &nextIn)
bytesConsumed := len(p) - int(availableIn)
p = p[bytesConsumed:]
n += bytesConsumed
if !success {
return n, errEncode
}
if len(p) == 0 || w.err != nil {
return n, w.err
}
}
}
// Flush outputs encoded data for all input provided to Write. The resulting
// output can be decoded to match all input before Flush, but the stream is
// not yet complete until after Close.
// Flush has a negative impact on compression.
func (w *Writer) Flush() error {
_, err := w.writeChunk(nil, operationFlush)
return err
}
// Close flushes remaining data to the decorated writer.
func (w *Writer) Close() error {
// If stream is already closed, it is reported by `writeChunk`.
_, err := w.writeChunk(nil, operationFinish)
w.dst = nil
return err
}
// Write implements io.Writer. Flush or Close must be called to ensure that the
// encoded bytes are actually flushed to the underlying Writer.
func (w *Writer) Write(p []byte) (n int, err error) {
return w.writeChunk(p, operationProcess)
}
type nopCloser struct {
io.Writer
}
func (nopCloser) Close() error { return nil }
// NewWriterV2 is like NewWriterLevel, but it uses the new implementation
// based on the matchfinder package. It currently supports up to level 7;
// if a higher level is specified, level 7 will be used.
func NewWriterV2(dst io.Writer, level int) *matchfinder.Writer {
var mf matchfinder.MatchFinder
if level < 2 {
mf = matchfinder.M0{Lazy: level == 1}
} else {
hashLen := 6
if level >= 6 {
hashLen = 5
}
chainLen := 64
switch level {
case 2:
chainLen = 0
case 3:
chainLen = 1
case 4:
chainLen = 2
case 5:
chainLen = 4
case 6:
chainLen = 8
}
mf = &matchfinder.M4{
MaxDistance: 1 << 20,
ChainLength: chainLen,
HashLen: hashLen,
DistanceBitCost: 57,
}
}
return &matchfinder.Writer{
Dest: dst,
MatchFinder: mf,
Encoder: &Encoder{},
BlockSize: 1 << 16,
}
}
+54
View File
@@ -0,0 +1,54 @@
*.o
*.swp
*.swm
*.swn
*.a
*.so
_obj
_test
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.exe~
*.test
*.prof
*.rar
*.zip
*.gz
*.psd
*.bmd
*.cfg
*.pptx
*.log
*nohup.out
*settings.pyc
*.sublime-project
*.sublime-workspace
.DS_Store
/.idea/
/.vscode/
/output/
/vendor/
/Gopkg.lock
/Gopkg.toml
coverage.html
coverage.out
coverage.xml
junit.xml
*.profile
*.svg
*.out
ast/test.out
ast/bench.sh
!testdata/*.json.gz
fuzz/testdata
*__debug_bin*
*pprof
*coverage.txt
+6
View File
@@ -0,0 +1,6 @@
[submodule "cloudwego"]
path = tools/asm2asm
url = https://github.com/cloudwego/asm2asm.git
[submodule "tools/simde"]
path = tools/simde
url = https://github.com/simd-everywhere/simde.git
+24
View File
@@ -0,0 +1,24 @@
header:
license:
spdx-id: Apache-2.0
copyright-owner: ByteDance Inc.
paths:
- '**/*.go'
- '**/*.s'
paths-ignore:
- 'ast/asm.s' # empty file
- 'decoder/asm.s' # empty file
- 'encoder/asm.s' # empty file
- 'internal/caching/asm.s' # empty file
- 'internal/jit/asm.s' # empty file
- 'internal/native/avx/native_amd64.s' # auto-generated by asm2asm
- 'internal/native/avx/native_subr_amd64.go' # auto-generated by asm2asm
- 'internal/native/avx2/native_amd64.s' # auto-generated by asm2asm
- 'internal/native/avx2/native_subr_amd64.go' # auto-generated by asm2asm
- 'internal/resolver/asm.s' # empty file
- 'internal/rt/asm.s' # empty file
- 'internal/loader/asm.s' # empty file
comment: on-failure
+128
View File
@@ -0,0 +1,128 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
wudi.daniel@bytedance.com.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
+63
View File
@@ -0,0 +1,63 @@
# How to Contribute
## Your First Pull Request
We use GitHub for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests).
## Without Semantic Versioning
We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. We promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes.
## Branch Organization
We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development)
## Bugs
### 1. How to Find Known Issues
We are using [Github Issues](https://github.com/bytedance/sonic/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesnt already exist.
### 2. Reporting New Issues
Providing a reduced test code is a recommended way for reporting issues. Then can be placed in:
- Just in issues
- [Golang Playground](https://play.golang.org/)
### 3. Security Bugs
Please do not report the safe disclosure of bugs to public issues. Contact us by [Support Email](mailto:sonic@bytedance.com)
## How to Get in Touch
- [Email](mailto:wudi.daniel@bytedance.com)
## Submit a Pull Request
Before you submit your Pull Request (PR) consider the following guidelines:
1. Search [GitHub](https://github.com/bytedance/sonic/pulls) for an open or closed PR that relates to your submission. You don't want to duplicate existing efforts.
2. Be sure that an issue describes the problem you're fixing, or documents the design for the feature you'd like to add. Discussing the design upfront helps to ensure that we're ready to accept your work.
3. [Fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) the bytedance/sonic repo.
4. In your forked repository, make your changes in a new git branch:
```
git checkout -b bugfix/security_bug develop
```
5. Create your patch, including appropriate test cases.
6. Follow our [Style Guides](#code-style-guides).
7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit).
Adherence to these conventions is necessary because release notes will be automatically generated from these messages.
8. Push your branch to GitHub:
```
git push origin bugfix/security_bug
```
9. In GitHub, send a pull request to `sonic:main`
Note: you must use one of `optimize/feature/bugfix/doc/ci/test/refactor` following a slash(`/`) as the branch prefix.
Your pr title and commit message should follow https://www.conventionalcommits.org/.
## Contribution Prerequisites
- Our development environment keeps up with [Go Official](https://golang.org/project/).
- You need fully checking with lint tools before submit your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) & [golangci-lint](https://github.com/golangci/golangci-lint)
- You are familiar with [Github](https://github.com)
- Maybe you need familiar with [Actions](https://github.com/features/actions)(our default workflow tool).
## Code Style Guides
See [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments).
Good resources:
- [Effective Go](https://golang.org/doc/effective_go)
- [Pingcap General advice](https://pingcap.github.io/style-guide/general.html)
- [Uber Go Style Guide](https://github.com/uber-go/guide/blob/master/style.md)
View File
+201
View File
@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+487
View File
@@ -0,0 +1,487 @@
# Sonic
English | [中文](README_ZH_CN.md)
A blazingly fast JSON serializing &amp; deserializing library, accelerated by JIT (just-in-time compiling) and SIMD (single-instruction-multiple-data).
## Requirement
- Go: 1.17~1.23
- OS: Linux / MacOS / Windows
- CPU: AMD64 / ARM64(need go1.20 above)
## Features
- Runtime object binding without code generation
- Complete APIs for JSON value manipulation
- Fast, fast, fast!
## APIs
see [go.dev](https://pkg.go.dev/github.com/bytedance/sonic)
## Benchmarks
For **all sizes** of json and **all scenarios** of usage, **Sonic performs best**.
- [Medium](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13KB, 300+ key, 6 layers)
```powershell
goversion: 1.17.1
goos: darwin
goarch: amd64
cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
BenchmarkEncoder_Generic_Sonic-16 32393 ns/op 402.40 MB/s 11965 B/op 4 allocs/op
BenchmarkEncoder_Generic_Sonic_Fast-16 21668 ns/op 601.57 MB/s 10940 B/op 4 allocs/op
BenchmarkEncoder_Generic_JsonIter-16 42168 ns/op 309.12 MB/s 14345 B/op 115 allocs/op
BenchmarkEncoder_Generic_GoJson-16 65189 ns/op 199.96 MB/s 23261 B/op 16 allocs/op
BenchmarkEncoder_Generic_StdLib-16 106322 ns/op 122.60 MB/s 49136 B/op 789 allocs/op
BenchmarkEncoder_Binding_Sonic-16 6269 ns/op 2079.26 MB/s 14173 B/op 4 allocs/op
BenchmarkEncoder_Binding_Sonic_Fast-16 5281 ns/op 2468.16 MB/s 12322 B/op 4 allocs/op
BenchmarkEncoder_Binding_JsonIter-16 20056 ns/op 649.93 MB/s 9488 B/op 2 allocs/op
BenchmarkEncoder_Binding_GoJson-16 8311 ns/op 1568.32 MB/s 9481 B/op 1 allocs/op
BenchmarkEncoder_Binding_StdLib-16 16448 ns/op 792.52 MB/s 9479 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic-16 6681 ns/op 1950.93 MB/s 12738 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4179 ns/op 3118.99 MB/s 10757 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_JsonIter-16 9861 ns/op 1321.84 MB/s 14362 B/op 115 allocs/op
BenchmarkEncoder_Parallel_Generic_GoJson-16 18850 ns/op 691.52 MB/s 23278 B/op 16 allocs/op
BenchmarkEncoder_Parallel_Generic_StdLib-16 45902 ns/op 283.97 MB/s 49174 B/op 789 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic-16 1480 ns/op 8810.09 MB/s 13049 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1209 ns/op 10785.23 MB/s 11546 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_JsonIter-16 6170 ns/op 2112.58 MB/s 9504 B/op 2 allocs/op
BenchmarkEncoder_Parallel_Binding_GoJson-16 3321 ns/op 3925.52 MB/s 9496 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Binding_StdLib-16 3739 ns/op 3486.49 MB/s 9480 B/op 1 allocs/op
BenchmarkDecoder_Generic_Sonic-16 66812 ns/op 195.10 MB/s 57602 B/op 723 allocs/op
BenchmarkDecoder_Generic_Sonic_Fast-16 54523 ns/op 239.07 MB/s 49786 B/op 313 allocs/op
BenchmarkDecoder_Generic_StdLib-16 124260 ns/op 104.90 MB/s 50869 B/op 772 allocs/op
BenchmarkDecoder_Generic_JsonIter-16 91274 ns/op 142.81 MB/s 55782 B/op 1068 allocs/op
BenchmarkDecoder_Generic_GoJson-16 88569 ns/op 147.17 MB/s 66367 B/op 973 allocs/op
BenchmarkDecoder_Binding_Sonic-16 32557 ns/op 400.38 MB/s 28302 B/op 137 allocs/op
BenchmarkDecoder_Binding_Sonic_Fast-16 28649 ns/op 455.00 MB/s 24999 B/op 34 allocs/op
BenchmarkDecoder_Binding_StdLib-16 111437 ns/op 116.97 MB/s 10576 B/op 208 allocs/op
BenchmarkDecoder_Binding_JsonIter-16 35090 ns/op 371.48 MB/s 14673 B/op 385 allocs/op
BenchmarkDecoder_Binding_GoJson-16 28738 ns/op 453.59 MB/s 22039 B/op 49 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic-16 12321 ns/op 1057.91 MB/s 57233 B/op 723 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10644 ns/op 1224.64 MB/s 49362 B/op 313 allocs/op
BenchmarkDecoder_Parallel_Generic_StdLib-16 57587 ns/op 226.35 MB/s 50874 B/op 772 allocs/op
BenchmarkDecoder_Parallel_Generic_JsonIter-16 38666 ns/op 337.12 MB/s 55789 B/op 1068 allocs/op
BenchmarkDecoder_Parallel_Generic_GoJson-16 30259 ns/op 430.79 MB/s 66370 B/op 974 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic-16 5965 ns/op 2185.28 MB/s 27747 B/op 137 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 5170 ns/op 2521.31 MB/s 24715 B/op 34 allocs/op
BenchmarkDecoder_Parallel_Binding_StdLib-16 27582 ns/op 472.58 MB/s 10576 B/op 208 allocs/op
BenchmarkDecoder_Parallel_Binding_JsonIter-16 13571 ns/op 960.51 MB/s 14685 B/op 385 allocs/op
BenchmarkDecoder_Parallel_Binding_GoJson-16 10031 ns/op 1299.51 MB/s 22111 B/op 49 allocs/op
BenchmarkGetOne_Sonic-16 3276 ns/op 3975.78 MB/s 24 B/op 1 allocs/op
BenchmarkGetOne_Gjson-16 9431 ns/op 1380.81 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Jsoniter-16 51178 ns/op 254.46 MB/s 27936 B/op 647 allocs/op
BenchmarkGetOne_Parallel_Sonic-16 216.7 ns/op 60098.95 MB/s 24 B/op 1 allocs/op
BenchmarkGetOne_Parallel_Gjson-16 1076 ns/op 12098.62 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Parallel_Jsoniter-16 17741 ns/op 734.06 MB/s 27945 B/op 647 allocs/op
BenchmarkSetOne_Sonic-16 9571 ns/op 1360.61 MB/s 1584 B/op 17 allocs/op
BenchmarkSetOne_Sjson-16 36456 ns/op 357.22 MB/s 52180 B/op 9 allocs/op
BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.86 MB/s 45862 B/op 964 allocs/op
BenchmarkSetOne_Parallel_Sonic-16 850.9 ns/op 15305.31 MB/s 1584 B/op 17 allocs/op
BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op
BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op
BenchmarkLoadNode/LoadAll()-16 11384 ns/op 1143.93 MB/s 6307 B/op 25 allocs/op
BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.68 MB/s 7145 B/op 25 allocs/op
BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op
BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op
```
- [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers)
![small benchmarks](./docs/imgs/bench-small.png)
- [Large](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635KB, 10000+ key, 6 layers)
![large benchmarks](./docs/imgs/bench-large.png)
See [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) for benchmark codes.
## How it works
See [INTRODUCTION.md](./docs/INTRODUCTION.md).
## Usage
### Marshal/Unmarshal
Default behaviors are mostly consistent with `encoding/json`, except HTML escaping form (see [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) and `SortKeys` feature (optional support see [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)) that is **NOT** in conformity to [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259).
```go
import "github.com/bytedance/sonic"
var data YourSchema
// Marshal
output, err := sonic.Marshal(&data)
// Unmarshal
err := sonic.Unmarshal(output, &data)
```
### Streaming IO
Sonic supports decoding json from `io.Reader` or encoding objects into `io.Writer`, aims at handling multiple values as well as reducing memory consumption.
- encoder
```go
var o1 = map[string]interface{}{
"a": "b",
}
var o2 = 1
var w = bytes.NewBuffer(nil)
var enc = sonic.ConfigDefault.NewEncoder(w)
enc.Encode(o1)
enc.Encode(o2)
fmt.Println(w.String())
// Output:
// {"a":"b"}
// 1
```
- decoder
```go
var o = map[string]interface{}{}
var r = strings.NewReader(`{"a":"b"}{"1":"2"}`)
var dec = sonic.ConfigDefault.NewDecoder(r)
dec.Decode(&o)
dec.Decode(&o)
fmt.Printf("%+v", o)
// Output:
// map[1:2 a:b]
```
### Use Number/Use Int64
```go
import "github.com/bytedance/sonic/decoder"
var input = `1`
var data interface{}
// default float64
dc := decoder.NewDecoder(input)
dc.Decode(&data) // data == float64(1)
// use json.Number
dc = decoder.NewDecoder(input)
dc.UseNumber()
dc.Decode(&data) // data == json.Number("1")
// use int64
dc = decoder.NewDecoder(input)
dc.UseInt64()
dc.Decode(&data) // data == int64(1)
root, err := sonic.GetFromString(input)
// Get json.Number
jn := root.Number()
jm := root.InterfaceUseNumber().(json.Number) // jn == jm
// Get float64
fn := root.Float64()
fm := root.Interface().(float64) // jn == jm
```
### Sort Keys
On account of the performance loss from sorting (roughly 10%), sonic doesn't enable this feature by default. If your component depends on it to work (like [zstd](https://github.com/facebook/zstd)), Use it like this:
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/encoder"
// Binding map only
m := map[string]interface{}{}
v, err := encoder.Encode(m, encoder.SortMapKeys)
// Or ast.Node.SortKeys() before marshal
var root := sonic.Get(JSON)
err := root.SortKeys()
```
### Escape HTML
On account of the performance loss (roughly 15%), sonic doesn't enable this feature by default. You can use `encoder.EscapeHTML` option to open this feature (align with `encoding/json.HTMLEscape`).
```go
import "github.com/bytedance/sonic"
v := map[string]string{"&&":"<>"}
ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}`
```
### Compact Format
Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DONOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process.
### Print Error
If there invalid syntax in input JSON, sonic will return `decoder.SyntaxError`, which supports pretty-printing of error position
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
var data interface{}
err := sonic.UnmarshalString("[[[}]]", &data)
if err != nil {
/* One line by default */
println(e.Error()) // "Syntax error at index 3: invalid char\n\n\t[[[}]]\n\t...^..\n"
/* Pretty print */
if e, ok := err.(decoder.SyntaxError); ok {
/*Syntax error at index 3: invalid char
[[[}]]
...^..
*/
print(e.Description())
} else if me, ok := err.(*decoder.MismatchTypeError); ok {
// decoder.MismatchTypeError is new to Sonic v1.6.0
print(me.Description())
}
}
```
#### Mismatched Types [Sonic v1.6.0]
If there a **mismatch-typed** value for a given key, sonic will report `decoder.MismatchTypeError` (if there are many, report the last one), but still skip wrong the value and keep decoding next JSON.
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
var data = struct{
A int
B int
}{}
err := UnmarshalString(`{"A":"1","B":1}`, &data)
println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n"
fmt.Printf("%+v", data) // {A:0 B:1}
```
### Ast.Node
Sonic/ast.Node is a completely self-contained AST for JSON. It implements serialization and deserialization both and provides robust APIs for obtaining and modification of generic data.
#### Get/Index
Search partial JSON by given paths, which must be non-negative integer or string, or nil
```go
import "github.com/bytedance/sonic"
input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`)
// no path, returns entire json
root, err := sonic.Get(input)
raw := root.Raw() // == string(input)
// multiple paths
root, err := sonic.Get(input, "key1", 1, "key2")
sub := root.Get("key3").Index(2).Int64() // == 3
```
**Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched.
#### SearchOption
`Searcher` provides some options for user to meet different needs:
```go
opts := ast.SearchOption{ CopyReturn: true ... }
val, err := sonic.GetWithOptions(JSON, opts, "key")
```
- CopyReturn
Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results
- ConcurentRead
Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it.
- ValidateJSON
Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little.
#### Set/Unset
Modify the json content by Set()/Unset()
```go
import "github.com/bytedance/sonic"
// Set
exist, err := root.Set("key4", NewBool(true)) // exist == false
alias1 := root.Get("key4")
println(alias1.Valid()) // true
alias2 := root.Index(1)
println(alias1 == alias2) // true
// Unset
exist, err := root.UnsetByIndex(1) // exist == true
println(root.Get("key4").Check()) // "value not exist"
```
#### Serialize
To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer)
```go
import (
"encoding/json"
"github.com/bytedance/sonic"
)
buf, err := root.MarshalJson()
println(string(buf)) // {"key1":[{},{"key2":{"key3":[1,2,3]}}]}
exp, err := json.Marshal(&root) // WARN: use pointer
println(string(buf) == string(exp)) // true
```
#### APIs
- validation: `Check()`, `Error()`, `Valid()`, `Exist()`
- searching: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()`
- go-type casting: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()`
- go-type packing: `NewRaw()`, `NewNumber()`, `NewNull()`, `NewBool()`, `NewString()`, `NewObject()`, `NewArray()`
- iteration: `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
- modification: `Set()`, `SetByIndex()`, `Add()`
### Ast.Visitor
Sonic provides an advanced API for fully parsing JSON into non-standard types (neither `struct` not `map[string]interface{}`) without using any intermediate representation (`ast.Node` or `interface{}`). For example, you might have the following types which are like `interface{}` but actually not `interface{}`:
```go
type UserNode interface {}
// the following types implement the UserNode interface.
type (
UserNull struct{}
UserBool struct{ Value bool }
UserInt64 struct{ Value int64 }
UserFloat64 struct{ Value float64 }
UserString struct{ Value string }
UserObject struct{ Value map[string]UserNode }
UserArray struct{ Value []UserNode }
)
```
Sonic provides the following API to return **the preorder traversal of a JSON AST**. The `ast.Visitor` is a SAX style interface which is used in some C++ JSON library. You should implement `ast.Visitor` by yourself and pass it to `ast.Preorder()` method. In your visitor you can make your custom types to represent JSON values. There may be an O(n) space container (such as stack) in your visitor to record the object / array hierarchy.
```go
func Preorder(str string, visitor Visitor, opts *VisitorOptions) error
type Visitor interface {
OnNull() error
OnBool(v bool) error
OnString(v string) error
OnInt64(v int64, n json.Number) error
OnFloat64(v float64, n json.Number) error
OnObjectBegin(capacity int) error
OnObjectKey(key string) error
OnObjectEnd() error
OnArrayBegin(capacity int) error
OnArrayEnd() error
}
```
See [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) for detailed usage. We also implement a demo visitor for `UserNode` in [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go).
## Compatibility
Sonic **DOES NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. For developers who use sonic to build their applications in different environments, we have the following suggestions:
- Developing on **Mac M1**: Make sure you have Rosetta 2 installed on your machine, and set `GOARCH=amd64` when building your application. Rosetta 2 can automatically translate x86 binaries to arm64 binaries and run x86 applications on Mac M1.
- Developing on **Linux arm64**: You can install qemu and use the `qemu-x86_64 -cpu max` command to convert x86 binaries to amr64 binaries for applications built with sonic. The qemu can achieve a similar transfer effect to Rosetta 2 on Mac M1.
For developers who want to use sonic on Linux arm64 without qemu, or those who want to handle JSON strictly consistent with `encoding/json`, we provide some compatible APIs as `sonic.API`
- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options like `SortKeys=false` will be invalid.
- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) to run on sonic-supporting environment. It will fall back to `encoding/json`.
- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options will be invalid.
## Tips
### Pretouch
Since Sonic uses [golang-asm](https://github.com/twitchyliquid64/golang-asm) as a JIT assembler, which is NOT very suitable for runtime compiling, first-hit running of a huge schema may cause request-timeout or even process-OOM. For better stability, we advise **using `Pretouch()` for huge-schema or compact-memory applications** before `Marshal()/Unmarshal()`.
```go
import (
"reflect"
"github.com/bytedance/sonic"
"github.com/bytedance/sonic/option"
)
func init() {
var v HugeStruct
// For most large types (nesting depth <= option.DefaultMaxInlineDepth)
err := sonic.Pretouch(reflect.TypeOf(v))
// with more CompileOption...
err := sonic.Pretouch(reflect.TypeOf(v),
// If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth),
// you can set compile recursive loops in Pretouch for better stability in JIT.
option.WithCompileRecursiveDepth(loop),
// For a large nested struct, try to set a smaller depth to reduce compiling time.
option.WithCompileMaxInlineDepth(depth),
)
}
```
### Copy string
When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. - `Config.CopyString`/`decoder.CopyString()`: We provide the option for `Decode()` / `Unmarshal()` users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree.
- `GetFromStringNoCopy()`: For memory safety, `sonic.Get()` / `sonic.GetFromString()` now copies return JSON. If users want to get json more quickly and not care about memory usage, you can use `GetFromStringNoCopy()` to return a JSON directly referenced from source.
### Pass string or []byte?
For alignment to `encoding/json`, we provide API to pass `[]byte` as an argument, but the string-to-bytes copy is conducted at the same time considering safety, which may lose performance when the origin JSON is huge. Therefore, you can use `UnmarshalString()` and `GetFromString()` to pass a string, as long as your origin data is a string or **nocopy-cast** is safe for your []byte. We also provide API `MarshalString()` for convenient **nocopy-cast** of encoded JSON []byte, which is safe since sonic's output bytes is always duplicated and unique.
### Accelerate `encoding.TextMarshaler`
To ensure data security, sonic.Encoder quotes and escapes string values from `encoding.TextMarshaler` interfaces by default, which may degrade performance much if most of your data is in form of them. We provide `encoder.NoQuoteTextMarshaler` to skip these operations, which means you **MUST** ensure their output string escaped and quoted following [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259).
### Better performance for generic data
In **fully-parsed** scenario, `Unmarshal()` performs better than `Get()`+`Node.Interface()`. But if you only have a part of the schema for specific json, you can combine `Get()` and `Unmarshal()` together:
```go
import "github.com/bytedance/sonic"
node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user")
var user User // your partial schema...
err = sonic.UnmarshalString(node.Raw(), &user)
```
Even if you don't have any schema, use `ast.Node` as the container of generic values instead of `map` or `interface`:
```go
import "github.com/bytedance/sonic"
root, err := sonic.GetFromString(_TwitterJson)
user := root.GetByPath("statuses", 3, "user") // === root.Get("status").Index(3).Get("user")
err = user.Check()
// err = user.LoadAll() // only call this when you want to use 'user' concurrently...
go someFunc(user)
```
Why? Because `ast.Node` stores its children using `array`:
- `Array`'s performance is **much better** than `Map` when Inserting (Deserialize) and Scanning (Serialize) data;
- **Hashing** (`map[x]`) is not as efficient as **Indexing** (`array[x]`), which `ast.Node` can conduct on **both array and object**;
- Using `Interface()`/`Map()` means Sonic must parse all the underlying values, while `ast.Node` can parse them **on demand**.
**CAUTION:** `ast.Node` **DOESN'T** ensure concurrent security directly, due to its **lazy-load** design. However, you can call `Node.Load()`/`Node.LoadAll()` to achieve that, which may bring performance reduction while it still works faster than converting to `map` or `interface{}`
### Ast.Node or Ast.Visitor?
For generic data, `ast.Node` should be enough for your needs in most cases.
However, `ast.Node` is designed for partially processing JSON string. It has some special designs such as lazy-load which might not be suitable for directly parsing the whole JSON string like `Unmarshal()`. Although `ast.Node` is better then `map` or `interface{}`, it's also a kind of intermediate representation after all if your final types are customized and you have to convert the above types to your custom types after parsing.
For better performance, in previous case the `ast.Visitor` will be the better choice. It performs JSON decoding like `Unmarshal()` and you can directly use your final types to represents a JSON AST without any intermediate representations.
But `ast.Visitor` is not a very handy API. You might need to write a lot of code to implement your visitor and carefully maintain the tree hierarchy during decoding. Please read the comments in [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) carefully if you decide to use this API.
### Buffer Size
Sonic use memory pool in many places like `encoder.Encode`, `ast.Node.MarshalJSON` to improve performace, which may produce more memory usage (in-use) when server's load is high. See [issue 614](https://github.com/bytedance/sonic/issues/614). Therefore, we introduce some options to let user control the behavior of memory pool. See [option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables) package.
## Community
Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem.
+485
View File
@@ -0,0 +1,485 @@
# Sonic
[English](README.md) | 中文
一个速度奇快的 JSON 序列化/反序列化库,由 JIT (即时编译)和 SIMD (单指令流多数据流)加速。
## 依赖
- Go: 1.17~1.23
- OS: Linux / MacOS / Windows
- CPU: AMD64 / ARM64(需要 Go1.20 以上)
## 接口
详见 [go.dev](https://pkg.go.dev/github.com/bytedance/sonic)
## 特色
- 运行时对象绑定,无需代码生成
- 完备的 JSON 操作 API
- 快,更快,还要更快!
## 基准测试
对于**所有大小**的 json 和**所有使用场景** **Sonic 表现均为最佳**
- [中型](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13kB, 300+ 键, 6 层)
```powershell
goversion: 1.17.1
goos: darwin
goarch: amd64
cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
BenchmarkEncoder_Generic_Sonic-16 32393 ns/op 402.40 MB/s 11965 B/op 4 allocs/op
BenchmarkEncoder_Generic_Sonic_Fast-16 21668 ns/op 601.57 MB/s 10940 B/op 4 allocs/op
BenchmarkEncoder_Generic_JsonIter-16 42168 ns/op 309.12 MB/s 14345 B/op 115 allocs/op
BenchmarkEncoder_Generic_GoJson-16 65189 ns/op 199.96 MB/s 23261 B/op 16 allocs/op
BenchmarkEncoder_Generic_StdLib-16 106322 ns/op 122.60 MB/s 49136 B/op 789 allocs/op
BenchmarkEncoder_Binding_Sonic-16 6269 ns/op 2079.26 MB/s 14173 B/op 4 allocs/op
BenchmarkEncoder_Binding_Sonic_Fast-16 5281 ns/op 2468.16 MB/s 12322 B/op 4 allocs/op
BenchmarkEncoder_Binding_JsonIter-16 20056 ns/op 649.93 MB/s 9488 B/op 2 allocs/op
BenchmarkEncoder_Binding_GoJson-16 8311 ns/op 1568.32 MB/s 9481 B/op 1 allocs/op
BenchmarkEncoder_Binding_StdLib-16 16448 ns/op 792.52 MB/s 9479 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic-16 6681 ns/op 1950.93 MB/s 12738 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16 4179 ns/op 3118.99 MB/s 10757 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Generic_JsonIter-16 9861 ns/op 1321.84 MB/s 14362 B/op 115 allocs/op
BenchmarkEncoder_Parallel_Generic_GoJson-16 18850 ns/op 691.52 MB/s 23278 B/op 16 allocs/op
BenchmarkEncoder_Parallel_Generic_StdLib-16 45902 ns/op 283.97 MB/s 49174 B/op 789 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic-16 1480 ns/op 8810.09 MB/s 13049 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16 1209 ns/op 10785.23 MB/s 11546 B/op 4 allocs/op
BenchmarkEncoder_Parallel_Binding_JsonIter-16 6170 ns/op 2112.58 MB/s 9504 B/op 2 allocs/op
BenchmarkEncoder_Parallel_Binding_GoJson-16 3321 ns/op 3925.52 MB/s 9496 B/op 1 allocs/op
BenchmarkEncoder_Parallel_Binding_StdLib-16 3739 ns/op 3486.49 MB/s 9480 B/op 1 allocs/op
BenchmarkDecoder_Generic_Sonic-16 66812 ns/op 195.10 MB/s 57602 B/op 723 allocs/op
BenchmarkDecoder_Generic_Sonic_Fast-16 54523 ns/op 239.07 MB/s 49786 B/op 313 allocs/op
BenchmarkDecoder_Generic_StdLib-16 124260 ns/op 104.90 MB/s 50869 B/op 772 allocs/op
BenchmarkDecoder_Generic_JsonIter-16 91274 ns/op 142.81 MB/s 55782 B/op 1068 allocs/op
BenchmarkDecoder_Generic_GoJson-16 88569 ns/op 147.17 MB/s 66367 B/op 973 allocs/op
BenchmarkDecoder_Binding_Sonic-16 32557 ns/op 400.38 MB/s 28302 B/op 137 allocs/op
BenchmarkDecoder_Binding_Sonic_Fast-16 28649 ns/op 455.00 MB/s 24999 B/op 34 allocs/op
BenchmarkDecoder_Binding_StdLib-16 111437 ns/op 116.97 MB/s 10576 B/op 208 allocs/op
BenchmarkDecoder_Binding_JsonIter-16 35090 ns/op 371.48 MB/s 14673 B/op 385 allocs/op
BenchmarkDecoder_Binding_GoJson-16 28738 ns/op 453.59 MB/s 22039 B/op 49 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic-16 12321 ns/op 1057.91 MB/s 57233 B/op 723 allocs/op
BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16 10644 ns/op 1224.64 MB/s 49362 B/op 313 allocs/op
BenchmarkDecoder_Parallel_Generic_StdLib-16 57587 ns/op 226.35 MB/s 50874 B/op 772 allocs/op
BenchmarkDecoder_Parallel_Generic_JsonIter-16 38666 ns/op 337.12 MB/s 55789 B/op 1068 allocs/op
BenchmarkDecoder_Parallel_Generic_GoJson-16 30259 ns/op 430.79 MB/s 66370 B/op 974 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic-16 5965 ns/op 2185.28 MB/s 27747 B/op 137 allocs/op
BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16 5170 ns/op 2521.31 MB/s 24715 B/op 34 allocs/op
BenchmarkDecoder_Parallel_Binding_StdLib-16 27582 ns/op 472.58 MB/s 10576 B/op 208 allocs/op
BenchmarkDecoder_Parallel_Binding_JsonIter-16 13571 ns/op 960.51 MB/s 14685 B/op 385 allocs/op
BenchmarkDecoder_Parallel_Binding_GoJson-16 10031 ns/op 1299.51 MB/s 22111 B/op 49 allocs/op
BenchmarkGetOne_Sonic-16 3276 ns/op 3975.78 MB/s 24 B/op 1 allocs/op
BenchmarkGetOne_Gjson-16 9431 ns/op 1380.81 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Jsoniter-16 51178 ns/op 254.46 MB/s 27936 B/op 647 allocs/op
BenchmarkGetOne_Parallel_Sonic-16 216.7 ns/op 60098.95 MB/s 24 B/op 1 allocs/op
BenchmarkGetOne_Parallel_Gjson-16 1076 ns/op 12098.62 MB/s 0 B/op 0 allocs/op
BenchmarkGetOne_Parallel_Jsoniter-16 17741 ns/op 734.06 MB/s 27945 B/op 647 allocs/op
BenchmarkSetOne_Sonic-16 9571 ns/op 1360.61 MB/s 1584 B/op 17 allocs/op
BenchmarkSetOne_Sjson-16 36456 ns/op 357.22 MB/s 52180 B/op 9 allocs/op
BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.86 MB/s 45862 B/op 964 allocs/op
BenchmarkSetOne_Parallel_Sonic-16 850.9 ns/op 15305.31 MB/s 1584 B/op 17 allocs/op
BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op
BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op
BenchmarkLoadNode/LoadAll()-16 11384 ns/op 1143.93 MB/s 6307 B/op 25 allocs/op
BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.68 MB/s 7145 B/op 25 allocs/op
BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op
BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op
```
- [小型](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 个键, 3 层)
![small benchmarks](./docs/imgs/bench-small.png)
- [大型](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635kB, 10000+ 个键, 6 层)
![large benchmarks](./docs/imgs/bench-large.png)
要查看基准测试代码,请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) 。
## 工作原理
请参阅 [INTRODUCTION_ZH_CN.md](./docs/INTRODUCTION_ZH_CN.md).
## 使用方式
### 序列化/反序列化
默认的行为基本上与 `encoding/json` 相一致,除了 HTML 转义形式(参见 [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) 和 `SortKeys` 功能(参见 [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)**没有**遵循 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 。
```go
import "github.com/bytedance/sonic"
var data YourSchema
// Marshal
output, err := sonic.Marshal(&data)
// Unmarshal
err := sonic.Unmarshal(output, &data)
```
### 流式输入输出
Sonic 支持解码 `io.Reader` 中输入的 json,或将对象编码为 json 后输出至 `io.Writer`,以处理多个值并减少内存消耗。
- 编码器
```go
var o1 = map[string]interface{}{
"a": "b",
}
var o2 = 1
var w = bytes.NewBuffer(nil)
var enc = sonic.ConfigDefault.NewEncoder(w)
enc.Encode(o1)
enc.Encode(o2)
fmt.Println(w.String())
// Output:
// {"a":"b"}
// 1
```
- 解码器
```go
var o = map[string]interface{}{}
var r = strings.NewReader(`{"a":"b"}{"1":"2"}`)
var dec = sonic.ConfigDefault.NewDecoder(r)
dec.Decode(&o)
dec.Decode(&o)
fmt.Printf("%+v", o)
// Output:
// map[1:2 a:b]
```
### 使用 `Number` / `int64`
```go
import "github.com/bytedance/sonic/decoder"
var input = `1`
var data interface{}
// default float64
dc := decoder.NewDecoder(input)
dc.Decode(&data) // data == float64(1)
// use json.Number
dc = decoder.NewDecoder(input)
dc.UseNumber()
dc.Decode(&data) // data == json.Number("1")
// use int64
dc = decoder.NewDecoder(input)
dc.UseInt64()
dc.Decode(&data) // data == int64(1)
root, err := sonic.GetFromString(input)
// Get json.Number
jn := root.Number()
jm := root.InterfaceUseNumber().(json.Number) // jn == jm
// Get float64
fn := root.Float64()
fm := root.Interface().(float64) // jn == jm
```
### 对键排序
考虑到排序带来的性能损失(约 10% ), sonic 默认不会启用这个功能。如果你的组件依赖这个行为(如 [zstd](https://github.com/facebook/zstd)) ,可以仿照下面的例子:
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/encoder"
// Binding map only
m := map[string]interface{}{}
v, err := encoder.Encode(m, encoder.SortMapKeys)
// Or ast.Node.SortKeys() before marshal
var root := sonic.Get(JSON)
err := root.SortKeys()
```
### HTML 转义
考虑到性能损失(约15%), sonic 默认不会启用这个功能。你可以使用 `encoder.EscapeHTML` 选项来开启(与 `encoding/json.HTMLEscape` 行为一致)。
```go
import "github.com/bytedance/sonic"
v := map[string]string{"&&":"<>"}
ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}`
```
### 紧凑格式
Sonic 默认将基本类型( `struct` `map` 等)编码为紧凑格式的 JSON ,除非使用 `json.RawMessage` or `json.Marshaler` 进行编码: sonic 确保输出的 JSON 合法,但出于性能考虑,**不会**加工成紧凑格式。我们提供选项 `encoder.CompactMarshaler` 来添加此过程,
### 打印错误
如果输入的 JSON 存在无效的语法,sonic 将返回 `decoder.SyntaxError`,该错误支持错误位置的美化输出。
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
var data interface{}
err := sonic.UnmarshalString("[[[}]]", &data)
if err != nil {
/* One line by default */
println(e.Error()) // "Syntax error at index 3: invalid char\n\n\t[[[}]]\n\t...^..\n"
/* Pretty print */
if e, ok := err.(decoder.SyntaxError); ok {
/*Syntax error at index 3: invalid char
[[[}]]
...^..
*/
print(e.Description())
} else if me, ok := err.(*decoder.MismatchTypeError); ok {
// decoder.MismatchTypeError is new to Sonic v1.6.0
print(me.Description())
}
}
```
#### 类型不匹配 [Sonic v1.6.0]
如果给定键中存在**类型不匹配**的值, sonic 会抛出 `decoder.MismatchTypeError` (如果有多个,只会报告最后一个),但仍会跳过错误的值并解码下一个 JSON 。
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
var data = struct{
A int
B int
}{}
err := UnmarshalString(`{"A":"1","B":1}`, &data)
println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n"
fmt.Printf("%+v", data) // {A:0 B:1}
```
### `Ast.Node`
Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改JSON数据的鲁棒的 API。
#### 查找/索引
通过给定的路径搜索 JSON 片段,路径必须为非负整数,字符串或 `nil`
```go
import "github.com/bytedance/sonic"
input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`)
// no path, returns entire json
root, err := sonic.Get(input)
raw := root.Raw() // == string(input)
// multiple paths
root, err := sonic.Get(input, "key1", 1, "key2")
sub := root.Get("key3").Index(2).Int64() // == 3
```
**注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API, `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。
#### 查找选项
`ast.Searcher`提供了一些选项,以满足用户的不同需求:
```
opts:= ast.SearchOption{CopyReturn: true…}
Val, err:= sonic。gettwithoptions (JSON, opts "key")
```
- CopyReturn
指示搜索器复制结果JSON字符串,而不是从输入引用。如果用户缓存结果,这有助于减少内存使用
- ConcurentRead
因为`ast.Node`使用`Lazy-Load`设计,默认不支持并发读取。如果您想同时读取,请指定它。
- ValidateJSON
指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。
#### 修改
使用 `Set()` / `Unset()` 修改 json 的内容
```go
import "github.com/bytedance/sonic"
// Set
exist, err := root.Set("key4", NewBool(true)) // exist == false
alias1 := root.Get("key4")
println(alias1.Valid()) // true
alias2 := root.Index(1)
println(alias1 == alias2) // true
// Unset
exist, err := root.UnsetByIndex(1) // exist == true
println(root.Get("key4").Check()) // "value not exist"
```
#### 序列化
要将 `ast.Node` 编码为 json ,使用 `MarshalJson()` 或者 `json.Marshal()` (必须传递指向节点的指针)
```go
import (
"encoding/json"
"github.com/bytedance/sonic"
)
buf, err := root.MarshalJson()
println(string(buf)) // {"key1":[{},{"key2":{"key3":[1,2,3]}}]}
exp, err := json.Marshal(&root) // WARN: use pointer
println(string(buf) == string(exp)) // true
```
#### APIs
- 合法性检查: `Check()`, `Error()`, `Valid()`, `Exist()`
- 索引: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()`
- 转换至 go 内置类型: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()`
- go 类型打包: `NewRaw()`, `NewNumber()`, `NewNull()`, `NewBool()`, `NewString()`, `NewObject()`, `NewArray()`
- 迭代: `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
- 修改: `Set()`, `SetByIndex()`, `Add()`
### `Ast.Visitor`
Sonic 提供了一个高级的 API 用于直接全量解析 JSON 到非标准容器里 (既不是 `struct` 也不是 `map[string]interface{}`) 且不需要借助任何中间表示 (`ast.Node``interface{}`)。举个例子,你可能定义了下述的类型,它们看起来像 `interface{}`,但实际上并不是:
```go
type UserNode interface {}
// the following types implement the UserNode interface.
type (
UserNull struct{}
UserBool struct{ Value bool }
UserInt64 struct{ Value int64 }
UserFloat64 struct{ Value float64 }
UserString struct{ Value string }
UserObject struct{ Value map[string]UserNode }
UserArray struct{ Value []UserNode }
)
```
Sonic 提供了下述的 API 来返回 **“对 JSON AST 的前序遍历”**。`ast.Visitor` 是一个 SAX 风格的接口,这在某些 C++ 的 JSON 解析库中被使用到。你需要自己实现一个 `ast.Visitor`,将它传递给 `ast.Preorder()` 方法。在你的实现中你可以使用自定义的类型来表示 JSON 的值。在你的 `ast.Visitor` 中,可能需要有一个 O(n) 空间复杂度的容器(比如说栈)来记录 object / array 的层级。
```go
func Preorder(str string, visitor Visitor, opts *VisitorOptions) error
type Visitor interface {
OnNull() error
OnBool(v bool) error
OnString(v string) error
OnInt64(v int64, n json.Number) error
OnFloat64(v float64, n json.Number) error
OnObjectBegin(capacity int) error
OnObjectKey(key string) error
OnObjectEnd() error
OnArrayBegin(capacity int) error
OnArrayEnd() error
}
```
详细用法参看 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go),我们还为 `UserNode` 实现了一个示例 `ast.Visitor`,你可以在 [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go) 中找到它。
## 兼容性
由于开发高性能代码的困难性, Sonic **不**保证对所有环境的支持。对于在不同环境中使用 Sonic 构建应用程序的开发者,我们有以下建议:
- 在 **Mac M1** 上开发:确保在您的计算机上安装了 Rosetta 2,并在构建时设置 `GOARCH=amd64` 。 Rosetta 2 可以自动将 x86 二进制文件转换为 arm64 二进制文件,并在 Mac M1 上运行 x86 应用程序。
- 在 **Linux arm64** 上开发:您可以安装 qemu 并使用 `qemu-x86_64 -cpu max` 命令来将 x86 二进制文件转换为 arm64 二进制文件。qemu可以实现与Mac M1上的Rosetta 2类似的转换效果。
对于希望在不使用 qemu 下使用 sonic 的开发者,或者希望处理 JSON 时与 `encoding/JSON` 严格保持一致的开发者,我们在 `sonic.API` 中提供了一些兼容性 API
- `ConfigDefault`: 在支持 sonic 的环境下 sonic 的默认配置(`EscapeHTML=false``SortKeys=false`等)。行为与具有相应配置的 `encoding/json` 一致,一些选项,如 `SortKeys=false` 将无效。
- `ConfigStd`: 在支持 sonic 的环境下与标准库兼容的配置(`EscapeHTML=true``SortKeys=true`等)。行为与 `encoding/json` 一致。
- `ConfigFastest`: 在支持 sonic 的环境下运行最快的配置(`NoQuoteTextMarshaler=true`)。行为与具有相应配置的 `encoding/json` 一致,某些选项将无效。
## 注意事项
### 预热
由于 Sonic 使用 [golang-asm](https://github.com/twitchyliquid64/golang-asm) 作为 JIT 汇编器,这个库并不适用于运行时编译,第一次运行一个大型模式可能会导致请求超时甚至进程内存溢出。为了更好地稳定性,我们建议在运行大型模式或在内存有限的应用中,在使用 `Marshal()/Unmarshal()` 前运行 `Pretouch()`
```go
import (
"reflect"
"github.com/bytedance/sonic"
"github.com/bytedance/sonic/option"
)
func init() {
var v HugeStruct
// For most large types (nesting depth <= option.DefaultMaxInlineDepth)
err := sonic.Pretouch(reflect.TypeOf(v))
// with more CompileOption...
err := sonic.Pretouch(reflect.TypeOf(v),
// If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth),
// you can set compile recursive loops in Pretouch for better stability in JIT.
option.WithCompileRecursiveDepth(loop),
// For a large nested struct, try to set a smaller depth to reduce compiling time.
option.WithCompileMaxInlineDepth(depth),
)
}
```
### 拷贝字符串
当解码 **没有转义字符的字符串**时, sonic 会从原始的 JSON 缓冲区内引用而不是复制到新的一个缓冲区中。这对 CPU 的性能方面很有帮助,但是可能因此在解码后对象仍在使用的时候将整个 JSON 缓冲区保留在内存中。实践中我们发现,通过引用 JSON 缓冲区引入的额外内存通常是解码后对象的 20% 至 80% ,一旦应用长期保留这些对象(如缓存以备重用),服务器所使用的内存可能会增加。我们提供了选项 `decoder.CopyString()` 供用户选择,不引用 JSON 缓冲区。这可能在一定程度上降低 CPU 性能。
### 传递字符串还是字节数组?
为了和 `encoding/json` 保持一致,我们提供了传递 `[]byte` 作为参数的 API ,但考虑到安全性,字符串到字节的复制是同时进行的,这在原始 JSON 非常大时可能会导致性能损失。因此,你可以使用 `UnmarshalString()``GetFromString()` 来传递字符串,只要你的原始数据是字符串,或**零拷贝类型转换**对于你的字节数组是安全的。我们也提供了 `MarshalString()` 的 API ,以便对编码的 JSON 字节数组进行**零拷贝类型转换**,因为 sonic 输出的字节始终是重复并且唯一的,所以这样是安全的。
### 加速 `encoding.TextMarshaler`
为了保证数据安全性, `sonic.Encoder` 默认会对来自 `encoding.TextMarshaler` 接口的字符串进行引用和转义,如果大部分数据都是这种形式那可能会导致很大的性能损失。我们提供了 `encoder.NoQuoteTextMarshaler` 选项来跳过这些操作,但你**必须**保证他们的输出字符串依照 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 进行了转义和引用。
### 泛型的性能优化
在 **完全解析**的场景下, `Unmarshal()` 表现得比 `Get()`+`Node.Interface()` 更好。但是如果你只有特定 JSON 的部分模式,你可以将 `Get()``Unmarshal()` 结合使用:
```go
import "github.com/bytedance/sonic"
node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user")
var user User // your partial schema...
err = sonic.UnmarshalString(node.Raw(), &user)
```
甚至如果你没有任何模式,可以用 `ast.Node` 代替 `map``interface` 作为泛型的容器:
```go
import "github.com/bytedance/sonic"
root, err := sonic.GetFromString(_TwitterJson)
user := root.GetByPath("statuses", 3, "user") // === root.Get("status").Index(3).Get("user")
err = user.Check()
// err = user.LoadAll() // only call this when you want to use 'user' concurrently...
go someFunc(user)
```
为什么?因为 `ast.Node` 使用 `array` 来存储其子节点:
- 在插入(反序列化)和扫描(序列化)数据时,`Array` 的性能比 `Map` **好得多**
- **哈希**`map[x]`)的效率不如**索引**`array[x]`)高效,而 `ast.Node` 可以在数组和对象上使用索引;
- 使用 `Interface()` / `Map()` 意味着 sonic 必须解析所有的底层值,而 `ast.Node` 可以**按需解析**它们。
**注意**:由于 `ast.Node` 的惰性加载设计,其**不能**直接保证并发安全性,但你可以调用 `Node.Load()` / `Node.LoadAll()` 来实现并发安全。尽管可能会带来性能损失,但仍比转换成 `map``interface{}` 更为高效。
### 使用 `ast.Node` 还是 `ast.Visitor`
对于泛型数据的解析,`ast.Node` 在大多数场景上应该能够满足你的需求。
然而,`ast.Node` 是一种针对部分解析 JSON 而设计的泛型容器,它包含一些特殊设计,比如惰性加载,如果你希望像 `Unmarshal()` 那样直接解析整个 JSON,这些设计可能并不合适。尽管 `ast.Node` 相较于 `map``interface{}` 来说是更好的一种泛型容器,但它毕竟也是一种中间表示,如果你的最终类型是自定义的,你还得在解析完成后将上述类型转化成你自定义的类型。
在上述场景中,如果想要有更极致的性能,`ast.Visitor` 会是更好的选择。它采用和 `Unmarshal()` 类似的形式解析 JSON,并且你可以直接使用你的最终类型去表示 JSON AST,而不需要经过额外的任何中间表示。
但是,`ast.Visitor` 并不是一个很易用的 API。你可能需要写大量的代码去实现自己的 `ast.Visitor`,并且需要在解析过程中仔细维护树的层级。如果你决定要使用这个 API,请先仔细阅读 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) 中的注释。
### 缓冲区大小
Sonic在许多地方使用内存池,如`encoder.Encode`, `ast.Node.MarshalJSON`等来提高性能,这可能会在服务器负载高时产生更多的内存使用(in-use)。参见[issue 614](https://github.com/bytedance/sonic/issues/614)。因此,我们引入了一些选项来让用户配置内存池的行为。参见[option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables)包。
## 社区
Sonic 是 [CloudWeGo](https://www.cloudwego.io/) 下的一个子项目。我们致力于构建云原生生态系统。
+242
View File
@@ -0,0 +1,242 @@
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sonic
import (
`io`
`github.com/bytedance/sonic/ast`
`github.com/bytedance/sonic/internal/rt`
)
const (
// UseStdJSON indicates you are using fallback implementation (encoding/json)
UseStdJSON = iota
// UseSonicJSON indicates you are using real sonic implementation
UseSonicJSON
)
// APIKind is the kind of API, 0 is std json, 1 is sonic.
const APIKind = apiKind
// Config is a combination of sonic/encoder.Options and sonic/decoder.Options
type Config struct {
// EscapeHTML indicates encoder to escape all HTML characters
// after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
// WARNING: This hurts performance A LOT, USE WITH CARE.
EscapeHTML bool
// SortMapKeys indicates encoder that the keys of a map needs to be sorted
// before serializing into JSON.
// WARNING: This hurts performance A LOT, USE WITH CARE.
SortMapKeys bool
// CompactMarshaler indicates encoder that the output JSON from json.Marshaler
// is always compact and needs no validation
CompactMarshaler bool
// NoQuoteTextMarshaler indicates encoder that the output text from encoding.TextMarshaler
// is always escaped string and needs no quoting
NoQuoteTextMarshaler bool
// NoNullSliceOrMap indicates encoder that all empty Array or Object are encoded as '[]' or '{}',
// instead of 'null'
NoNullSliceOrMap bool
// UseInt64 indicates decoder to unmarshal an integer into an interface{} as an
// int64 instead of as a float64.
UseInt64 bool
// UseNumber indicates decoder to unmarshal a number into an interface{} as a
// json.Number instead of as a float64.
UseNumber bool
// UseUnicodeErrors indicates decoder to return an error when encounter invalid
// UTF-8 escape sequences.
UseUnicodeErrors bool
// DisallowUnknownFields indicates decoder to return an error when the destination
// is a struct and the input contains object keys which do not match any
// non-ignored, exported fields in the destination.
DisallowUnknownFields bool
// CopyString indicates decoder to decode string values by copying instead of referring.
CopyString bool
// ValidateString indicates decoder and encoder to valid string values: decoder will return errors
// when unescaped control chars(\u0000-\u001f) in the string value of JSON.
ValidateString bool
// NoValidateJSONMarshaler indicates that the encoder should not validate the output string
// after encoding the JSONMarshaler to JSON.
NoValidateJSONMarshaler bool
// NoEncoderNewline indicates that the encoder should not add a newline after every message
NoEncoderNewline bool
// Encode Infinity or Nan float into `null`, instead of returning an error.
EncodeNullForInfOrNan bool
}
var (
// ConfigDefault is the default config of APIs, aiming at efficiency and safety.
ConfigDefault = Config{}.Froze()
// ConfigStd is the standard config of APIs, aiming at being compatible with encoding/json.
ConfigStd = Config{
EscapeHTML : true,
SortMapKeys: true,
CompactMarshaler: true,
CopyString : true,
ValidateString : true,
}.Froze()
// ConfigFastest is the fastest config of APIs, aiming at speed.
ConfigFastest = Config{
NoQuoteTextMarshaler: true,
NoValidateJSONMarshaler: true,
}.Froze()
)
// API is a binding of specific config.
// This interface is inspired by github.com/json-iterator/go,
// and has same behaviors under equavilent config.
type API interface {
// MarshalToString returns the JSON encoding string of v
MarshalToString(v interface{}) (string, error)
// Marshal returns the JSON encoding bytes of v.
Marshal(v interface{}) ([]byte, error)
// MarshalIndent returns the JSON encoding bytes with indent and prefix.
MarshalIndent(v interface{}, prefix, indent string) ([]byte, error)
// UnmarshalFromString parses the JSON-encoded bytes and stores the result in the value pointed to by v.
UnmarshalFromString(str string, v interface{}) error
// Unmarshal parses the JSON-encoded string and stores the result in the value pointed to by v.
Unmarshal(data []byte, v interface{}) error
// NewEncoder create a Encoder holding writer
NewEncoder(writer io.Writer) Encoder
// NewDecoder create a Decoder holding reader
NewDecoder(reader io.Reader) Decoder
// Valid validates the JSON-encoded bytes and reports if it is valid
Valid(data []byte) bool
}
// Encoder encodes JSON into io.Writer
type Encoder interface {
// Encode writes the JSON encoding of v to the stream, followed by a newline character.
Encode(val interface{}) error
// SetEscapeHTML specifies whether problematic HTML characters
// should be escaped inside JSON quoted strings.
// The default behavior NOT ESCAPE
SetEscapeHTML(on bool)
// SetIndent instructs the encoder to format each subsequent encoded value
// as if indented by the package-level function Indent(dst, src, prefix, indent).
// Calling SetIndent("", "") disables indentation
SetIndent(prefix, indent string)
}
// Decoder decodes JSON from io.Read
type Decoder interface {
// Decode reads the next JSON-encoded value from its input and stores it in the value pointed to by v.
Decode(val interface{}) error
// Buffered returns a reader of the data remaining in the Decoder's buffer.
// The reader is valid until the next call to Decode.
Buffered() io.Reader
// DisallowUnknownFields causes the Decoder to return an error when the destination is a struct
// and the input contains object keys which do not match any non-ignored, exported fields in the destination.
DisallowUnknownFields()
// More reports whether there is another element in the current array or object being parsed.
More() bool
// UseNumber causes the Decoder to unmarshal a number into an interface{} as a Number instead of as a float64.
UseNumber()
}
// Marshal returns the JSON encoding bytes of v.
func Marshal(val interface{}) ([]byte, error) {
return ConfigDefault.Marshal(val)
}
// MarshalIndent is like Marshal but applies Indent to format the output.
// Each JSON element in the output will begin on a new line beginning with prefix
// followed by one or more copies of indent according to the indentation nesting.
func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) {
return ConfigDefault.MarshalIndent(v, prefix, indent)
}
// MarshalString returns the JSON encoding string of v.
func MarshalString(val interface{}) (string, error) {
return ConfigDefault.MarshalToString(val)
}
// Unmarshal parses the JSON-encoded data and stores the result in the value pointed to by v.
// NOTICE: This API copies given buffer by default,
// if you want to pass JSON more efficiently, use UnmarshalString instead.
func Unmarshal(buf []byte, val interface{}) error {
return ConfigDefault.Unmarshal(buf, val)
}
// UnmarshalString is like Unmarshal, except buf is a string.
func UnmarshalString(buf string, val interface{}) error {
return ConfigDefault.UnmarshalFromString(buf, val)
}
// Get searches and locates the given path from src json,
// and returns a ast.Node representing the partially json.
//
// Each path arg must be integer or string:
// - Integer is target index(>=0), means searching current node as array.
// - String is target key, means searching current node as object.
//
//
// Notice: It expects the src json is **Well-formed** and **Immutable** when calling,
// otherwise it may return unexpected result.
// Considering memory safety, the returned JSON is **Copied** from the input
func Get(src []byte, path ...interface{}) (ast.Node, error) {
return GetCopyFromString(rt.Mem2Str(src), path...)
}
//GetWithOptions searches and locates the given path from src json,
// with specific options of ast.Searcher
func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) {
s := ast.NewSearcher(rt.Mem2Str(src))
s.SearchOptions = opts
return s.GetByPath(path...)
}
// GetFromString is same with Get except src is string.
//
// WARNING: The returned JSON is **Referenced** from the input.
// Caching or long-time holding the returned node may cause OOM.
// If your src is big, consider use GetFromStringCopy().
func GetFromString(src string, path ...interface{}) (ast.Node, error) {
return ast.NewSearcher(src).GetByPath(path...)
}
// GetCopyFromString is same with Get except src is string
func GetCopyFromString(src string, path ...interface{}) (ast.Node, error) {
return ast.NewSearcher(src).GetByPathCopy(path...)
}
// Valid reports whether data is a valid JSON encoding.
func Valid(data []byte) bool {
return ConfigDefault.Valid(data)
}
// Valid reports whether data is a valid JSON encoding.
func ValidString(data string) bool {
return ConfigDefault.Valid(rt.Str2Mem(data))
}
+135
View File
@@ -0,0 +1,135 @@
//go:build (amd64 && go1.17 && !go1.24) || (arm64 && go1.20 && !go1.24)
// +build amd64,go1.17,!go1.24 arm64,go1.20,!go1.24
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`runtime`
`unsafe`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
uq `github.com/bytedance/sonic/unquote`
`github.com/bytedance/sonic/utf8`
)
var typeByte = rt.UnpackEface(byte(0)).Type
//go:nocheckptr
func quote(buf *[]byte, val string) {
*buf = append(*buf, '"')
if len(val) == 0 {
*buf = append(*buf, '"')
return
}
sp := rt.IndexChar(val, 0)
nb := len(val)
b := (*rt.GoSlice)(unsafe.Pointer(buf))
// input buffer
for nb > 0 {
// output buffer
dp := unsafe.Pointer(uintptr(b.Ptr) + uintptr(b.Len))
dn := b.Cap - b.Len
// call native.Quote, dn is byte count it outputs
ret := native.Quote(sp, nb, dp, &dn, 0)
// update *buf length
b.Len += dn
// no need more output
if ret >= 0 {
break
}
// double buf size
*b = rt.GrowSlice(typeByte, *b, b.Cap*2)
// ret is the complement of consumed input
ret = ^ret
// update input buffer
nb -= ret
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
}
runtime.KeepAlive(buf)
runtime.KeepAlive(sp)
*buf = append(*buf, '"')
}
func unquote(src string) (string, types.ParsingError) {
return uq.String(src)
}
func (self *Parser) decodeValue() (val types.JsonState) {
sv := (*rt.GoString)(unsafe.Pointer(&self.s))
flag := types.F_USE_NUMBER
if self.dbuf != nil {
flag = 0
val.Dbuf = self.dbuf
val.Dcap = types.MaxDigitNums
}
self.p = native.Value(sv.Ptr, sv.Len, self.p, &val, uint64(flag))
return
}
func (self *Parser) skip() (int, types.ParsingError) {
fsm := types.NewStateMachine()
start := native.SkipOne(&self.s, &self.p, fsm, 0)
types.FreeStateMachine(fsm)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Node) encodeInterface(buf *[]byte) error {
//WARN: NOT compatible with json.Encoder
return encoder.EncodeInto(buf, self.packAny(), encoder.NoEncoderNewline)
}
func (self *Parser) skipFast() (int, types.ParsingError) {
start := native.SkipOneFast(&self.s, &self.p)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
var fsm *types.StateMachine
if validate {
fsm = types.NewStateMachine()
}
start := native.GetByPath(&self.s, &self.p, &path, fsm)
if validate {
types.FreeStateMachine(fsm)
}
runtime.KeepAlive(path)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func validate_utf8(str string) bool {
return utf8.ValidateString(str)
}
+114
View File
@@ -0,0 +1,114 @@
// +build !amd64,!arm64 go1.24 !go1.17 arm64,!go1.20
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`encoding/json`
`unicode/utf8`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)
func init() {
println("WARNING:(ast) sonic only supports go1.17~1.23, but your environment is not suitable")
}
func quote(buf *[]byte, val string) {
quoteString(buf, val)
}
// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end)
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
}
func (self *Parser) decodeValue() (val types.JsonState) {
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
if e < 0 {
return v
}
self.p = e
return v
}
func (self *Parser) skip() (int, types.ParsingError) {
e, s := skipValue(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}
func (self *Parser) skipFast() (int, types.ParsingError) {
e, s := skipValueFast(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}
func (self *Node) encodeInterface(buf *[]byte) error {
out, err := json.Marshal(self.packAny())
if err != nil {
return err
}
*buf = append(*buf, out...)
return nil
}
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
for _, p := range path {
if idx, ok := p.(int); ok && idx >= 0 {
if err := self.searchIndex(idx); err != 0 {
return self.p, err
}
} else if key, ok := p.(string); ok {
if err := self.searchKey(key); err != 0 {
return self.p, err
}
} else {
panic("path must be either int(>=0) or string")
}
}
var start int
var e types.ParsingError
if validate {
start, e = self.skip()
} else {
start, e = self.skipFast()
}
if e != 0 {
return self.p, e
}
return start, 0
}
func validate_utf8(str string) bool {
return utf8.ValidString(str)
}
View File
+470
View File
@@ -0,0 +1,470 @@
/**
* Copyright 2023 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
"sort"
"unsafe"
"github.com/bytedance/sonic/internal/caching"
)
type nodeChunk [_DEFAULT_NODE_CAP]Node
type linkedNodes struct {
head nodeChunk
tail []*nodeChunk
size int
}
func (self *linkedNodes) Cap() int {
if self == nil {
return 0
}
return (len(self.tail)+1)*_DEFAULT_NODE_CAP
}
func (self *linkedNodes) Len() int {
if self == nil {
return 0
}
return self.size
}
func (self *linkedNodes) At(i int) (*Node) {
if self == nil {
return nil
}
if i >= 0 && i<self.size && i < _DEFAULT_NODE_CAP {
return &self.head[i]
} else if i >= _DEFAULT_NODE_CAP && i<self.size {
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < len(self.tail) {
return &self.tail[a][b]
}
}
return nil
}
func (self *linkedNodes) MoveOne(source int, target int) {
if source == target {
return
}
if source < 0 || source >= self.size || target < 0 || target >= self.size {
return
}
// reserve source
n := *self.At(source)
if source < target {
// move every element (source,target] one step back
for i:=source; i<target; i++ {
*self.At(i) = *self.At(i+1)
}
} else {
// move every element [target,source) one step forward
for i:=source; i>target; i-- {
*self.At(i) = *self.At(i-1)
}
}
// set target
*self.At(target) = n
}
func (self *linkedNodes) Pop() {
if self == nil || self.size == 0 {
return
}
self.Set(self.size-1, Node{})
self.size--
}
func (self *linkedNodes) Push(v Node) {
self.Set(self.size, v)
}
func (self *linkedNodes) Set(i int, v Node) {
if i < _DEFAULT_NODE_CAP {
self.head[i] = v
if self.size <= i {
self.size = i+1
}
return
}
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
self.head[b] = v
} else {
self.growTailLength(a+1)
var n = &self.tail[a]
if *n == nil {
*n = new(nodeChunk)
}
(*n)[b] = v
}
if self.size <= i {
self.size = i+1
}
}
func (self *linkedNodes) growTailLength(l int) {
if l <= len(self.tail) {
return
}
c := cap(self.tail)
for c < l {
c += 1 + c>>_APPEND_GROW_SHIFT
}
if c == cap(self.tail) {
self.tail = self.tail[:l]
return
}
tmp := make([]*nodeChunk, l, c)
copy(tmp, self.tail)
self.tail = tmp
}
func (self *linkedNodes) ToSlice(con []Node) {
if len(con) < self.size {
return
}
i := (self.size-1)
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
copy(con, self.head[:b+1])
return
} else {
copy(con, self.head[:])
con = con[_DEFAULT_NODE_CAP:]
}
for i:=0; i<a; i++ {
copy(con, self.tail[i][:])
con = con[_DEFAULT_NODE_CAP:]
}
copy(con, self.tail[a][:b+1])
}
func (self *linkedNodes) FromSlice(con []Node) {
self.size = len(con)
i := self.size-1
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
copy(self.head[:b+1], con)
return
} else {
copy(self.head[:], con)
con = con[_DEFAULT_NODE_CAP:]
}
if cap(self.tail) <= a {
c := (a+1) + (a+1)>>_APPEND_GROW_SHIFT
self.tail = make([]*nodeChunk, a+1, c)
}
self.tail = self.tail[:a+1]
for i:=0; i<a; i++ {
self.tail[i] = new(nodeChunk)
copy(self.tail[i][:], con)
con = con[_DEFAULT_NODE_CAP:]
}
self.tail[a] = new(nodeChunk)
copy(self.tail[a][:b+1], con)
}
type pairChunk [_DEFAULT_NODE_CAP]Pair
type linkedPairs struct {
index map[uint64]int
head pairChunk
tail []*pairChunk
size int
}
func (self *linkedPairs) BuildIndex() {
if self.index == nil {
self.index = make(map[uint64]int, self.size)
}
for i:=0; i<self.size; i++ {
p := self.At(i)
self.index[p.hash] = i
}
}
func (self *linkedPairs) Cap() int {
if self == nil {
return 0
}
return (len(self.tail)+1)*_DEFAULT_NODE_CAP
}
func (self *linkedPairs) Len() int {
if self == nil {
return 0
}
return self.size
}
func (self *linkedPairs) At(i int) *Pair {
if self == nil {
return nil
}
if i >= 0 && i < _DEFAULT_NODE_CAP && i<self.size {
return &self.head[i]
} else if i >= _DEFAULT_NODE_CAP && i<self.size {
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < len(self.tail) {
return &self.tail[a][b]
}
}
return nil
}
func (self *linkedPairs) Push(v Pair) {
self.Set(self.size, v)
}
func (self *linkedPairs) Pop() {
if self == nil || self.size == 0 {
return
}
self.Unset(self.size-1)
self.size--
}
func (self *linkedPairs) Unset(i int) {
if self.index != nil {
p := self.At(i)
delete(self.index, p.hash)
}
self.set(i, Pair{})
}
func (self *linkedPairs) Set(i int, v Pair) {
if self.index != nil {
h := v.hash
self.index[h] = i
}
self.set(i, v)
}
func (self *linkedPairs) set(i int, v Pair) {
if i < _DEFAULT_NODE_CAP {
self.head[i] = v
if self.size <= i {
self.size = i+1
}
return
}
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
self.head[b] = v
} else {
self.growTailLength(a+1)
var n = &self.tail[a]
if *n == nil {
*n = new(pairChunk)
}
(*n)[b] = v
}
if self.size <= i {
self.size = i+1
}
}
func (self *linkedPairs) growTailLength(l int) {
if l <= len(self.tail) {
return
}
c := cap(self.tail)
for c < l {
c += 1 + c>>_APPEND_GROW_SHIFT
}
if c == cap(self.tail) {
self.tail = self.tail[:l]
return
}
tmp := make([]*pairChunk, l, c)
copy(tmp, self.tail)
self.tail = tmp
}
// linear search
func (self *linkedPairs) Get(key string) (*Pair, int) {
if self.index != nil {
// fast-path
i, ok := self.index[caching.StrHash(key)]
if ok {
n := self.At(i)
if n.Key == key {
return n, i
}
// hash conflicts
goto linear_search
} else {
return nil, -1
}
}
linear_search:
for i:=0; i<self.size; i++ {
if n := self.At(i); n.Key == key {
return n, i
}
}
return nil, -1
}
func (self *linkedPairs) ToSlice(con []Pair) {
if len(con) < self.size {
return
}
i := self.size-1
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
copy(con, self.head[:b+1])
return
} else {
copy(con, self.head[:])
con = con[_DEFAULT_NODE_CAP:]
}
for i:=0; i<a; i++ {
copy(con, self.tail[i][:])
con = con[_DEFAULT_NODE_CAP:]
}
copy(con, self.tail[a][:b+1])
}
func (self *linkedPairs) ToMap(con map[string]Node) {
for i:=0; i<self.size; i++ {
n := self.At(i)
con[n.Key] = n.Value
}
}
func (self *linkedPairs) copyPairs(to []Pair, from []Pair, l int) {
copy(to, from)
if self.index != nil {
for i:=0; i<l; i++ {
// NOTICE: in case of user not pass hash, just cal it
h := caching.StrHash(from[i].Key)
from[i].hash = h
self.index[h] = i
}
}
}
func (self *linkedPairs) FromSlice(con []Pair) {
self.size = len(con)
i := self.size-1
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
self.copyPairs(self.head[:b+1], con, b+1)
return
} else {
self.copyPairs(self.head[:], con, len(self.head))
con = con[_DEFAULT_NODE_CAP:]
}
if cap(self.tail) <= a {
c := (a+1) + (a+1)>>_APPEND_GROW_SHIFT
self.tail = make([]*pairChunk, a+1, c)
}
self.tail = self.tail[:a+1]
for i:=0; i<a; i++ {
self.tail[i] = new(pairChunk)
self.copyPairs(self.tail[i][:], con, len(self.tail[i]))
con = con[_DEFAULT_NODE_CAP:]
}
self.tail[a] = new(pairChunk)
self.copyPairs(self.tail[a][:b+1], con, b+1)
}
func (self *linkedPairs) Less(i, j int) bool {
return lessFrom(self.At(i).Key, self.At(j).Key, 0)
}
func (self *linkedPairs) Swap(i, j int) {
a, b := self.At(i), self.At(j)
if self.index != nil {
self.index[a.hash] = j
self.index[b.hash] = i
}
*a, *b = *b, *a
}
func (self *linkedPairs) Sort() {
sort.Stable(self)
}
// Compare two strings from the pos d.
func lessFrom(a, b string, d int) bool {
l := len(a)
if l > len(b) {
l = len(b)
}
for i := d; i < l; i++ {
if a[i] == b[i] {
continue
}
return a[i] < b[i]
}
return len(a) < len(b)
}
type parseObjectStack struct {
parser Parser
v linkedPairs
}
type parseArrayStack struct {
parser Parser
v linkedNodes
}
func newLazyArray(p *Parser) Node {
s := new(parseArrayStack)
s.parser = *p
return Node{
t: _V_ARRAY_LAZY,
p: unsafe.Pointer(s),
}
}
func newLazyObject(p *Parser) Node {
s := new(parseObjectStack)
s.parser = *p
return Node{
t: _V_OBJECT_LAZY,
p: unsafe.Pointer(s),
}
}
func (self *Node) getParserAndArrayStack() (*Parser, *parseArrayStack) {
stack := (*parseArrayStack)(self.p)
return &stack.parser, stack
}
func (self *Node) getParserAndObjectStack() (*Parser, *parseObjectStack) {
stack := (*parseObjectStack)(self.p)
return &stack.parser, stack
}

Some files were not shown because too many files have changed in this diff Show More