...

Source file src/unicode/graphic.go

Documentation: unicode

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package unicode
		 6  
		 7  // Bit masks for each code point under U+0100, for fast lookup.
		 8  const (
		 9  	pC		 = 1 << iota // a control character.
		10  	pP								 // a punctuation character.
		11  	pN								 // a numeral.
		12  	pS								 // a symbolic character.
		13  	pZ								 // a spacing character.
		14  	pLu								// an upper-case letter.
		15  	pLl								// a lower-case letter.
		16  	pp								 // a printable character according to Go's definition.
		17  	pg		 = pp | pZ	 // a graphical character according to the Unicode definition.
		18  	pLo		= pLl | pLu // a letter that is neither upper nor lower case.
		19  	pLmask = pLo
		20  )
		21  
		22  // GraphicRanges defines the set of graphic characters according to Unicode.
		23  var GraphicRanges = []*RangeTable{
		24  	L, M, N, P, S, Zs,
		25  }
		26  
		27  // PrintRanges defines the set of printable characters according to Go.
		28  // ASCII space, U+0020, is handled separately.
		29  var PrintRanges = []*RangeTable{
		30  	L, M, N, P, S,
		31  }
		32  
		33  // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
		34  // Such characters include letters, marks, numbers, punctuation, symbols, and
		35  // spaces, from categories L, M, N, P, S, Zs.
		36  func IsGraphic(r rune) bool {
		37  	// We convert to uint32 to avoid the extra test for negative,
		38  	// and in the index we convert to uint8 to avoid the range check.
		39  	if uint32(r) <= MaxLatin1 {
		40  		return properties[uint8(r)]&pg != 0
		41  	}
		42  	return In(r, GraphicRanges...)
		43  }
		44  
		45  // IsPrint reports whether the rune is defined as printable by Go. Such
		46  // characters include letters, marks, numbers, punctuation, symbols, and the
		47  // ASCII space character, from categories L, M, N, P, S and the ASCII space
		48  // character. This categorization is the same as IsGraphic except that the
		49  // only spacing character is ASCII space, U+0020.
		50  func IsPrint(r rune) bool {
		51  	if uint32(r) <= MaxLatin1 {
		52  		return properties[uint8(r)]&pp != 0
		53  	}
		54  	return In(r, PrintRanges...)
		55  }
		56  
		57  // IsOneOf reports whether the rune is a member of one of the ranges.
		58  // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
		59  func IsOneOf(ranges []*RangeTable, r rune) bool {
		60  	for _, inside := range ranges {
		61  		if Is(inside, r) {
		62  			return true
		63  		}
		64  	}
		65  	return false
		66  }
		67  
		68  // In reports whether the rune is a member of one of the ranges.
		69  func In(r rune, ranges ...*RangeTable) bool {
		70  	for _, inside := range ranges {
		71  		if Is(inside, r) {
		72  			return true
		73  		}
		74  	}
		75  	return false
		76  }
		77  
		78  // IsControl reports whether the rune is a control character.
		79  // The C (Other) Unicode category includes more code points
		80  // such as surrogates; use Is(C, r) to test for them.
		81  func IsControl(r rune) bool {
		82  	if uint32(r) <= MaxLatin1 {
		83  		return properties[uint8(r)]&pC != 0
		84  	}
		85  	// All control characters are < MaxLatin1.
		86  	return false
		87  }
		88  
		89  // IsLetter reports whether the rune is a letter (category L).
		90  func IsLetter(r rune) bool {
		91  	if uint32(r) <= MaxLatin1 {
		92  		return properties[uint8(r)]&(pLmask) != 0
		93  	}
		94  	return isExcludingLatin(Letter, r)
		95  }
		96  
		97  // IsMark reports whether the rune is a mark character (category M).
		98  func IsMark(r rune) bool {
		99  	// There are no mark characters in Latin-1.
	 100  	return isExcludingLatin(Mark, r)
	 101  }
	 102  
	 103  // IsNumber reports whether the rune is a number (category N).
	 104  func IsNumber(r rune) bool {
	 105  	if uint32(r) <= MaxLatin1 {
	 106  		return properties[uint8(r)]&pN != 0
	 107  	}
	 108  	return isExcludingLatin(Number, r)
	 109  }
	 110  
	 111  // IsPunct reports whether the rune is a Unicode punctuation character
	 112  // (category P).
	 113  func IsPunct(r rune) bool {
	 114  	if uint32(r) <= MaxLatin1 {
	 115  		return properties[uint8(r)]&pP != 0
	 116  	}
	 117  	return Is(Punct, r)
	 118  }
	 119  
	 120  // IsSpace reports whether the rune is a space character as defined
	 121  // by Unicode's White Space property; in the Latin-1 space
	 122  // this is
	 123  //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
	 124  // Other definitions of spacing characters are set by category
	 125  // Z and property Pattern_White_Space.
	 126  func IsSpace(r rune) bool {
	 127  	// This property isn't the same as Z; special-case it.
	 128  	if uint32(r) <= MaxLatin1 {
	 129  		switch r {
	 130  		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
	 131  			return true
	 132  		}
	 133  		return false
	 134  	}
	 135  	return isExcludingLatin(White_Space, r)
	 136  }
	 137  
	 138  // IsSymbol reports whether the rune is a symbolic character.
	 139  func IsSymbol(r rune) bool {
	 140  	if uint32(r) <= MaxLatin1 {
	 141  		return properties[uint8(r)]&pS != 0
	 142  	}
	 143  	return isExcludingLatin(Symbol, r)
	 144  }
	 145  

View as plain text