1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-07-19 05:09:42 +02:00
documize/vendor/github.com/microsoft/go-mssqldb/ucs22str.go
2024-01-10 14:47:40 -05:00

151 lines
4.8 KiB
Go

//go:build !386 && !arm && !mips && !mipsle
// +build !386,!arm,!mips,!mipsle
package mssql
import (
"fmt"
"reflect"
"unicode/utf16"
"unsafe"
)
func ucs22str(s []byte) (string, error) {
if len(s)%2 != 0 {
return "", fmt.Errorf("illegal UCS2 string length: %d", len(s))
}
// allocate a buffer which we will attempt to copy ascii into, optimistically, as we validate
buf := make([]byte, len(s)/2)
useFastPath := true
// how many 8 byte chunks are in the input buffer
nlen8 := len(s) & 0xFFFFFFF8
// our read and write offsets into the buffers
var (
readIndex int
writeIndex int
)
// step through in 8 byte chunks.
for readIndex = 0; readIndex < nlen8; readIndex += 8 {
// dereference directly into the array as uint64s
ui64 := *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(&s[0])) + uintptr(readIndex)))
// mask the entire 64 bit region and check for
// 1) even bytes > 0
// 2) odd bytes with their high bit set
// the mask for this is FF80....
if ui64&mask64 > 0 {
// if we find a value once masked, we have to take the slow path as this is not an ascii string
useFastPath = false
break
}
// we are ok to read out the 4 odd bytes and remove the empty even bytes
var ui32 uint32 = 0
ui32 |= uint32(byte(ui64))
ui64 = ui64 >> 8
ui32 |= uint32(uint16(ui64))
ui64 = ui64 >> 8
ui32 |= uint32(ui64 & 0xFF0000)
ui64 = ui64 >> 8
ui32 |= uint32(ui64 & 0xFF000000)
// write the new 32 bit value to the destination buffer
ptrui32 := ((*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&buf[0])) + uintptr(writeIndex))))
*ptrui32 = ui32
// step forward four bytes in the destinaiton buffer
writeIndex += 4
}
// can we continue reading on the fast ascii path?
if useFastPath {
// we have now dealt with all the avalable 8 byte chunks, we have at most 7 bytes remaining.
// have we got at least 4 bytes remaining to be read?
if len(s)-readIndex >= 4 {
// deal with the next 32 bit region
// read 32 bits from the current read position in the source slice
ui32 := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&s[0])) + uintptr(readIndex)))
// mask the 32 bit value as above. again, if we find a value
// this is not ascii and we need to fall back to the slow path
// this time with a 32 bit mask
if ui32&mask32 > 0 {
// we have found non ascii text and must fallback
useFastPath = false
} else {
// read the two odd positions bytes and write as a single 16 bit value
var ui16 uint16 = 0
ui16 |= uint16(byte(ui32))
ui32 = ui32 >> 8
ui16 |= uint16(ui32)
ptrui16 := ((*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&buf[0])) + uintptr((writeIndex)))))
*ptrui16 = ui16
// step forward the read and write positions.
readIndex += 4
writeIndex += 2
}
}
// Are we still on the fast path?
if useFastPath {
// have we got at least 2 bytes remaining to be read?
// actually we can only have at most 2 bytes at this point
// since we know the source buffer has even length.
if len(s)-readIndex >= 2 {
// read 2 bytes
ui16 := *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&s[0])) + uintptr(readIndex)))
// mask again, but only 16bits
if ui16&mask16 == 0 {
// manually pull out the low byte and write to our destination buffer
buf[writeIndex] = byte(ui16 & 0xFF)
// we have now successfully read the entire ascii buffer and can convert to a string
return *(*string)(unsafe.Pointer(&buf)), nil
}
} else {
// there were no further bytes to read, but we have successfully read the ascii
// and can convert to a string
return *(*string)(unsafe.Pointer(&buf)), nil
}
}
}
// one of the above checks has found non ascii values in the buffer, either
// a high bit set in an odd byte or any non zero in an even byte.
// we fall back to a slower conversion here.
// we can reuse the underlying array and create our own uint16 slice here
// because utf16.Decode allocates a new buffer and only reads its input.
// declare a real uint16 slice so that the compiler can keep track of
// the underlying memory as we transfer & convert it.
// This is to ensure that the GC does not prematurely collect our data.
var uint16slice []uint16
uint16Header := (*reflect.SliceHeader)(unsafe.Pointer(&uint16slice))
sourceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&s))
uint16Header.Data = sourceHeader.Data
// it is important to reference s after the assignment of the Data
// pointer to make sure that s is not garbage collected before
// we have another reference to the data.
uint16Header.Len = len(s) / 2 // the output is half the length in bytes
uint16Header.Cap = uint16Header.Len // the capacity is also half the number of bytes
// decode the uint16s as utf-16 and return a string.
// After this point both s and uint16slice can be garbage collected.
return string(utf16.Decode(uint16slice)), nil
}