mirror of
https://github.com/documize/community.git
synced 2025-07-19 05:09:42 +02:00
151 lines
4.8 KiB
Go
151 lines
4.8 KiB
Go
//go:build !386 && !arm && !mips && !mipsle
|
|
// +build !386,!arm,!mips,!mipsle
|
|
|
|
package mssql
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"unicode/utf16"
|
|
"unsafe"
|
|
)
|
|
|
|
func ucs22str(s []byte) (string, error) {
|
|
if len(s)%2 != 0 {
|
|
return "", fmt.Errorf("illegal UCS2 string length: %d", len(s))
|
|
}
|
|
|
|
// allocate a buffer which we will attempt to copy ascii into, optimistically, as we validate
|
|
buf := make([]byte, len(s)/2)
|
|
useFastPath := true
|
|
|
|
// how many 8 byte chunks are in the input buffer
|
|
nlen8 := len(s) & 0xFFFFFFF8
|
|
// our read and write offsets into the buffers
|
|
var (
|
|
readIndex int
|
|
writeIndex int
|
|
)
|
|
|
|
// step through in 8 byte chunks.
|
|
for readIndex = 0; readIndex < nlen8; readIndex += 8 {
|
|
|
|
// dereference directly into the array as uint64s
|
|
ui64 := *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(&s[0])) + uintptr(readIndex)))
|
|
|
|
// mask the entire 64 bit region and check for
|
|
// 1) even bytes > 0
|
|
// 2) odd bytes with their high bit set
|
|
// the mask for this is FF80....
|
|
if ui64&mask64 > 0 {
|
|
// if we find a value once masked, we have to take the slow path as this is not an ascii string
|
|
useFastPath = false
|
|
break
|
|
}
|
|
|
|
// we are ok to read out the 4 odd bytes and remove the empty even bytes
|
|
var ui32 uint32 = 0
|
|
ui32 |= uint32(byte(ui64))
|
|
ui64 = ui64 >> 8
|
|
|
|
ui32 |= uint32(uint16(ui64))
|
|
ui64 = ui64 >> 8
|
|
|
|
ui32 |= uint32(ui64 & 0xFF0000)
|
|
ui64 = ui64 >> 8
|
|
ui32 |= uint32(ui64 & 0xFF000000)
|
|
|
|
// write the new 32 bit value to the destination buffer
|
|
ptrui32 := ((*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&buf[0])) + uintptr(writeIndex))))
|
|
*ptrui32 = ui32
|
|
|
|
// step forward four bytes in the destinaiton buffer
|
|
writeIndex += 4
|
|
}
|
|
|
|
// can we continue reading on the fast ascii path?
|
|
if useFastPath {
|
|
// we have now dealt with all the avalable 8 byte chunks, we have at most 7 bytes remaining.
|
|
|
|
// have we got at least 4 bytes remaining to be read?
|
|
if len(s)-readIndex >= 4 {
|
|
// deal with the next 32 bit region
|
|
|
|
// read 32 bits from the current read position in the source slice
|
|
ui32 := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&s[0])) + uintptr(readIndex)))
|
|
|
|
// mask the 32 bit value as above. again, if we find a value
|
|
// this is not ascii and we need to fall back to the slow path
|
|
// this time with a 32 bit mask
|
|
if ui32&mask32 > 0 {
|
|
// we have found non ascii text and must fallback
|
|
useFastPath = false
|
|
} else {
|
|
|
|
// read the two odd positions bytes and write as a single 16 bit value
|
|
var ui16 uint16 = 0
|
|
ui16 |= uint16(byte(ui32))
|
|
ui32 = ui32 >> 8
|
|
|
|
ui16 |= uint16(ui32)
|
|
|
|
ptrui16 := ((*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&buf[0])) + uintptr((writeIndex)))))
|
|
*ptrui16 = ui16
|
|
|
|
// step forward the read and write positions.
|
|
readIndex += 4
|
|
writeIndex += 2
|
|
}
|
|
}
|
|
|
|
// Are we still on the fast path?
|
|
if useFastPath {
|
|
// have we got at least 2 bytes remaining to be read?
|
|
// actually we can only have at most 2 bytes at this point
|
|
// since we know the source buffer has even length.
|
|
if len(s)-readIndex >= 2 {
|
|
|
|
// read 2 bytes
|
|
ui16 := *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&s[0])) + uintptr(readIndex)))
|
|
|
|
// mask again, but only 16bits
|
|
if ui16&mask16 == 0 {
|
|
// manually pull out the low byte and write to our destination buffer
|
|
buf[writeIndex] = byte(ui16 & 0xFF)
|
|
// we have now successfully read the entire ascii buffer and can convert to a string
|
|
return *(*string)(unsafe.Pointer(&buf)), nil
|
|
}
|
|
} else {
|
|
// there were no further bytes to read, but we have successfully read the ascii
|
|
// and can convert to a string
|
|
return *(*string)(unsafe.Pointer(&buf)), nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// one of the above checks has found non ascii values in the buffer, either
|
|
// a high bit set in an odd byte or any non zero in an even byte.
|
|
// we fall back to a slower conversion here.
|
|
|
|
// we can reuse the underlying array and create our own uint16 slice here
|
|
// because utf16.Decode allocates a new buffer and only reads its input.
|
|
|
|
// declare a real uint16 slice so that the compiler can keep track of
|
|
// the underlying memory as we transfer & convert it.
|
|
// This is to ensure that the GC does not prematurely collect our data.
|
|
var uint16slice []uint16
|
|
|
|
uint16Header := (*reflect.SliceHeader)(unsafe.Pointer(&uint16slice))
|
|
sourceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&s))
|
|
|
|
uint16Header.Data = sourceHeader.Data
|
|
// it is important to reference s after the assignment of the Data
|
|
// pointer to make sure that s is not garbage collected before
|
|
// we have another reference to the data.
|
|
uint16Header.Len = len(s) / 2 // the output is half the length in bytes
|
|
uint16Header.Cap = uint16Header.Len // the capacity is also half the number of bytes
|
|
|
|
// decode the uint16s as utf-16 and return a string.
|
|
// After this point both s and uint16slice can be garbage collected.
|
|
return string(utf16.Decode(uint16slice)), nil
|
|
}
|