vramestimator

package
v1.28.14 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 10, 2025 License: MIT Imports: 20 Imported by: 1

Documentation

Index

Constants

View Source
const (
	CUDASize = 500 * 1024 * 1024 // 500 MB
)

Variables

View Source
var EXL2Options []float64

EXL2Options contains the EXL2 quantisation options

View Source
var GGUFMapping = map[string]float64{
	"F16":     16,
	"Q8_0":    8.5,
	"Q6_K":    6.59,
	"Q5_K_L":  5.75,
	"Q5_K_M":  5.69,
	"Q5_K_S":  5.54,
	"Q5_0":    5.54,
	"Q4_K_L":  4.9,
	"Q4_K_M":  4.85,
	"Q4_K_S":  4.58,
	"Q4_0":    4.55,
	"IQ4_NL":  4.5,
	"Q3_K_L":  4.27,
	"IQ4_XS":  4.25,
	"Q3_K_M":  3.91,
	"IQ3_M":   3.7,
	"IQ3_S":   3.5,
	"Q3_K_S":  3.5,
	"Q2_K":    3.35,
	"IQ3_XS":  3.3,
	"IQ3_XXS": 3.06,
	"IQ2_M":   2.7,
	"IQ2_S":   2.5,
	"IQ2_XS":  2.31,
	"IQ2_XXS": 2.06,
	"IQ1_S":   1.56,
	"Q2":      3.35,
	"Q3":      3.5,
	"Q4":      4.55,
	"Q5":      5.54,
	"Q6":      6.59,
	"Q8":      8.5,
	"FP16":    16,
}

GGUFMapping maps GGUF quantisation types to their corresponding bits per weight

Functions

func CalculateBPW

func CalculateBPW(modelID string, memory float64, context int, kvCacheQuant KVCacheQuantisation, quantType string, ollamaModelInfo *OllamaModelInfo) (interface{}, error)

CalculateBPW calculates the best BPW for a given memory and context constraint

func CalculateContext

func CalculateContext(modelID string, memory, bpw float64, kvCacheQuant KVCacheQuantisation, ollamaModelInfo *OllamaModelInfo, topContext int) (int, error)

CalculateContext calculates the maximum context for a given memory constraint

func CalculateVRAM

func CalculateVRAM(modelID string, bpw float64, context int, kvCacheQuant KVCacheQuantisation, ollamaModelInfo *OllamaModelInfo) (float64, error)

CalculateVRAM calculates the VRAM usage for a given model and configuration

func CalculateVRAMRaw

func CalculateVRAMRaw(config ModelConfig, bpwValues BPWValues, context int, numGPUs int, gqa bool) float64

CalculateVRAMRaw calculates the raw VRAM usage

func DownloadFile

func DownloadFile(url, filePath string, headers map[string]string) error

DownloadFile downloads a file from a URL and saves it to the specified path

func EstimateVRAM added in v1.26.0

func EstimateVRAM(modelIdentifier, apiURL string, fitsVRAM float64) error

func GetAvailableMemory added in v1.24.0

func GetAvailableMemory() (float64, error)

func GetHuggingFaceToken added in v1.27.9

func GetHuggingFaceToken() string

func GetSystemRAM added in v1.24.0

func GetSystemRAM() (float64, error)

func ParseBPW

func ParseBPW(bpw string) float64

ParseBPW parses the BPW value

func ParseBPWOrQuant

func ParseBPWOrQuant(input string) (float64, error)

parseBPWOrQuant takes a string and returns a float64 BPW value

func ParseModelIdentifier added in v1.27.19

func ParseModelIdentifier(modelID string) (string, string, error)

ParseModelIdentifier parses a model identifier into its base name and quantisation level. Handles both HuggingFace (contains "/") and Ollama (contains ":" or neither) formats.

func PrintFormattedTable added in v1.24.0

func PrintFormattedTable(table QuantResultTable) string

PrintFormattedTable updates the table formatting with better descriptions

Types

type BPWValues

type BPWValues struct {
	BPW        float64
	LMHeadBPW  float64
	KVCacheBPW float64
}

BPWValues represents the bits per weight values for different components

func GetBPWValues

func GetBPWValues(bpw float64, kvCacheQuant KVCacheQuantisation) BPWValues

GetBPWValues calculates the BPW values based on the input

type ContextVRAM added in v1.24.0

type ContextVRAM struct {
	VRAM     float64
	VRAMQ8_0 float64
	VRAMQ4_0 float64
}

type KVCacheQuantisation

type KVCacheQuantisation string

KVCacheQuantisation represents the quantisation type for the k/v context cache

const (
	KVCacheFP16 KVCacheQuantisation = "fp16"
	KVCacheQ8_0 KVCacheQuantisation = "q8_0"
	KVCacheQ4_0 KVCacheQuantisation = "q4_0"
)

type ModelConfig

type ModelConfig struct {
	NumParams             float64 `json:"num_params"`
	MaxPositionEmbeddings int     `json:"max_position_embeddings"`
	NumHiddenLayers       int     `json:"num_hidden_layers"`
	HiddenSize            int     `json:"hidden_size"`
	NumKeyValueHeads      int     `json:"num_key_value_heads"`
	NumAttentionHeads     int     `json:"num_attention_heads"`
	IntermediateSize      int     `json:"intermediate_size"`
	VocabSize             int     `json:"vocab_size"`
}

ModelConfig represents the configuration of a model

func GetModelConfig

func GetModelConfig(modelID string) (ModelConfig, error)

GetModelConfig retrieves and parses the model configuration

type OllamaModelInfo added in v1.26.0

type OllamaModelInfo struct {
	Details struct {
		ParameterSize     string   `json:"parameter_size"`
		QuantizationLevel string   `json:"quantisation_level"`
		Family            string   `json:"family"`
		Families          []string `json:"families"`
	} `json:"details"`
	ModelInfo map[string]interface{} `json:"model_info"`
}

func FetchOllamaModelInfo added in v1.26.0

func FetchOllamaModelInfo(apiURL, modelName string) (*OllamaModelInfo, error)

type QuantResult added in v1.24.0

type QuantResult struct {
	QuantType string
	BPW       float64
	Contexts  map[int]ContextVRAM
}

Update the QuantResult struct

type QuantResultTable added in v1.24.0

type QuantResultTable struct {
	ModelID  string
	Results  []QuantResult
	FitsVRAM float64
}

QuantResultTable represents a table of VRAM estimation results

func GenerateQuantTable added in v1.24.0

func GenerateQuantTable(modelID string, fitsVRAM float64, ollamaModelInfo *OllamaModelInfo, topContext int) (QuantResultTable, error)

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL
JackTT - Gopher 🇻🇳