String类概要
String是不可变的
从String的怪异现象讲起String是否相等==判断的是对象的内存起始地址是否相同,equals判断自定义的语义是否相同
public class Demo { public static void main(String[] args) throws Exception { String s = 'abc'; String s1 = 'abc'; String s2 = 'a' + 'bc'; final String str1 = 'a'; final String str2 = 'bc'; String s3 = str1 + str2; String s4 = new String('abc'); System.out.println(s == s1); System.out.println(s == s2); System.out.println(s == s3); System.out.println(s == s4); }} //结果:true true true false
为什么String不可变final修饰变量,如果是基本类型那么内容运行期间不可变,如果是引用类型那么引用的对象(包括数组)运行期地址不可变,但是对象(数组)的内容是可以改变的
当然只要类库设计人愿意,只要增加一个类似的setCharAt(index)的接口,String就变成可变的了 private final char value[]; private int hash; // Default to 0 public String(char value[]) { this.value = Arrays.copyOf(value, value.length); }
通过反射改变String
public class Demo { public static void main(String[] args) throws Exception { String s = 'abc'; String str = s; String s1 = 'bbb'; System.out.println(str == s); Field f = s.getClass().getDeclaredField('value'); f.setAccessible(true); f.set(s, new char[]{'b', 'b', 'b'}); System.out.println(str + ' ' + s); System.out.println(s == str); System.out.println(s == s1); }} //结果:bbb bbb true false
String的HashCodes的内容改变了但是hashCode值并没有改变,虽然s与s1的内容是相同的但是他们hashCode值并不相同
public class Demo { public static void main(String[] args) throws Exception { String s = 'abc'; String s1 = 'bbb'; System.out.println(s.hashCode()); Field f = s.getClass().getDeclaredField('value'); f.setAccessible(true); f.set(s, new char[]{'b', 'b', 'b'}); System.out.println(s + ' '+ s1); System.out.println(s.hashCode() +' ' +s1.hashCode()); }} //结果:96354 bbb bbb 96354 97314
String hashCode的源码 public int hashCode() { int h = hash; if (h == 0 && value.length > 0) { char val[] = value; for (int i = 0; i < value.length; i++) { h = 31 * h + val[i]; } hash = h; } return h; }
toString方法中的this
public class Demo { @Override public String toString() { //会造成递归调用// return 'address'+super.toString(); return 'address'+super.toString(); } public static void main(String[] args) { System.out.println(new Demo()); }}
CodePoints与CodeUnitString的length表示的是代码单元的个数,而不是字符的个数
public class Demo { public static void main(String[] args) { String s = '\u1D56B'; System.out.println(s); System.out.println(s.length()); }}
我们看看String是怎么处理增补字符的
public String(int[] codePoints, int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > codePoints.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } final int end = offset + count; // Pass 1: Compute precise size of char[] int n = count; for (int i = offset; i < end; i++) { int c = codePoints[i]; if (Character.isBmpCodePoint(c)) continue; else if (Character.isValidCodePoint(c)) n++; else throw new IllegalArgumentException(Integer.toString(c)); } // Pass 2: Allocate and fill in char[] final char[] v = new char[n]; for (int i = offset, j = 0; i < end; i++, j++) { int c = codePoints[i]; if (Character.isBmpCodePoint(c)) v[j] = (char)c; else Character.toSurrogates(c, v, j++); } this.value = v; } static void toSurrogates(int codePoint, char[] dst, int index) { // We write elements 'backwards' to guarantee all-or-nothing dst[index+1] = lowSurrogate(codePoint); dst[index] = highSurrogate(codePoint); }
源码解析声明
public final class String implements java.io.Serializable, Comparable<String>, CharSequence { private final char value[]; private static final long serialVersionUID = -6849794470754667710L; /** Cache the hash code for the string */ private int hash; // Default to 0 public int hashCode() { int h = hash; if (h == 0 && value.length > 0) { char val[] = value; for (int i = 0; i < value.length; i++) { h = 31 * h + val[i]; } hash = h; } return h; }
构造函数
public String(String original) { this.value = original.value; this.hash = original.hash; } public String(StringBuffer buffer) { synchronized(buffer) { this.value = Arrays.copyOf(buffer.getValue(), buffer.length()); } } public String(StringBuilder builder) { this.value = Arrays.copyOf(builder.getValue(), builder.length()); } public String(char value[]) { this.value = Arrays.copyOf(value, value.length); } public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } this.value = Arrays.copyOfRange(value, offset, offset+count); } public String(byte bytes[], int offset, int length, Charset charset) { if (charset == null) throw new NullPointerException('charset'); checkBounds(bytes, offset, length); this.value = StringCoding.decode(charset, bytes, offset, length); } public String(byte bytes[], int offset, int length) { checkBounds(bytes, offset, length); this.value = StringCoding.decode(bytes, offset, length); } static char[] decode(byte[] ba, int off, int len) { String csn = Charset.defaultCharset().name(); try { // use charset name decode() variant which provides caching. return decode(csn, ba, off, len); } catch (UnsupportedEncodingException x) { warnUnsupportedCharset(csn); } try { return decode('ISO-8859-1', ba, off, len); } catch (UnsupportedEncodingException x) { // If this code is hit during VM initialization, MessageUtils is // the only way we will be able to get any kind of error message. MessageUtils.err('ISO-8859-1 charset not available: ' + x.toString()); // If we can not find ISO-8859-1 (a required encoding) then things // are seriously wrong with the installation. System.exit(1); return null; } }
内部构造函数使用外部数组来初始化String内部数组只有保证传入的数组不可能被改变才能保证String的不可变性,例如用String初始化String对象时
String(char[] value, boolean share) { // assert share : 'unshared not supported'; this.value = value; } public String concat(String str) { int otherLen = str.length(); if (otherLen == 0) { return this; } int len = value.length; char buf[] = Arrays.copyOf(value, len + otherLen); str.getChars(buf, len); return new String(buf, true); } // 使用了Arrays.copyof方法来构造新的数组,拷贝元素,而不是共用数组 public String substring(int beginIndex) { if (beginIndex < 0) { throw new StringIndexOutOfBoundsException(beginIndex); } int subLen = value.length - beginIndex; if (subLen < 0) { throw new StringIndexOutOfBoundsException(subLen); } return (beginIndex == 0) ? this : new String(value, beginIndex, subLen); }
如果String(value,share)可以在外部使用,就可以改变字符串内容 public class Demo { public static void main(String[] args) { char[] arr = new char[] {'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'}; String s = new String(arr,true); arr[0] = 'a'; System.out.println(s); }}
aLongString 已经不用了,但是由于其与aPart共享value数组,所以不能被回收,造成内存泄漏 public String subTest(){ String aLongString = '...a very long string...'; String aPart = aLongString.substring(20, 40); return aPart; }
主要方法其他主要方法length() 返回字符串长度isEmpty() 返回字符串是否为空charAt(int index) 返回字符串中第(index+1)个字符char[] toCharArray() 转化成字符数组trim() 去掉两端空格toUpperCase() 转化为大写toLowerCase() 转化为小写String concat(String str) //拼接字符串String replace(char oldChar, char newChar) //将字符串中的oldChar字符换成newChar字符//以上两个方法都使用了String(char[] value, boolean share);boolean matches(String regex) //判断字符串是否匹配给定的regex正则表达式boolean contains(CharSequence s) //判断字符串是否包含字符序列sString[] split(String regex, int limit) 按照字符regex将字符串分成limit份。String[] split(String regex)
重载的valueOf方法可以看到主要是调用构造函数或者是调用对应类型的toString完成到字符串的转换 public static String valueOf(boolean b) { return b ? 'true' : 'false'; } public static String valueOf(char c) { char data[] = {c}; return new String(data, true); } public static String valueOf(int i) { return Integer.toString(i); } public static String valueOf(long l) { return Long.toString(l); } public static String valueOf(float f) { return Float.toString(f); } public static String valueOf(double d) { return Double.toString(d); } public static String valueOf(char data[], int offset, int count) { return new String(data, offset, count); } public static String copyValueOf(char data[], int offset, int count) { // All public String constructors now copy the data. return new String(data, offset, count); }
字符串查找算法 indexOf可以看到String的字符串匹配算法使用的是朴素的匹配算法,即前向匹配,当遇到不匹配字符时,主串从下一个字符开始,字串从开始位置开始 static int indexOf(char[] source, int sourceOffset, int sourceCount, char[] target, int targetOffset, int targetCount, int fromIndex) { if (fromIndex >= sourceCount) { return (targetCount == 0 ? sourceCount : -1); } if (fromIndex < 0) { fromIndex = 0; } if (targetCount == 0) { return fromIndex; } char first = target[targetOffset]; int max = sourceOffset + (sourceCount - targetCount); for (int i = sourceOffset + fromIndex; i <= max; i++) { /* Look for first character. */ if (source[i] != first) { while (++i <= max && source[i] != first); } /* Found first character, now look at the rest of v2 */ if (i <= max) { int j = i + 1; int end = j + targetCount - 1; for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++); if (j == end) { /* Found whole string. */ return i - sourceOffset; } } } return -1; }
编码问题 getBytes
String s = '你好,世界!'; byte[] bytes = s.getBytes('utf-8'); public byte[] getBytes(String charsetName) throws UnsupportedEncodingException { if (charsetName == null) throw new NullPointerException(); return StringCoding.encode(charsetName, value, 0, value.length); } static byte[] encode(String charsetName, char[] ca, int off, int len) throws UnsupportedEncodingException { StringEncoder se = deref(encoder); String csn = (charsetName == null) ? 'ISO-8859-1' : charsetName; if ((se == null) || !(csn.equals(se.requestedCharsetName()) || csn.equals(se.charsetName()))) { se = null; try { Charset cs = lookupCharset(csn); if (cs != null) se = new StringEncoder(cs, csn); } catch (IllegalCharsetNameException x) {} if (se == null) throw new UnsupportedEncodingException (csn); set(encoder, se); } return se.encode(ca, off, len); }
比较方法
boolean equals(Object anObject); boolean contentEquals(StringBuffer sb); boolean contentEquals(CharSequence cs); boolean equalsIgnoreCase(String anotherString); int compareTo(String anotherString); int compareToIgnoreCase(String str); boolean regionMatches(int toffset, String other, int ooffset,int len) //局部匹配 boolean regionMatches(boolean ignoreCase, int toffset,String other, int ooffset, int len) //局部匹配 public boolean equals(Object anObject) { if (this == anObject) { return true; } if (anObject instanceof String) { String anotherString = (String) anObject; int n = value.length; if (n == anotherString.value.length) { char v1[] = value; char v2[] = anotherString.value; int i = 0; while (n-- != 0) { if (v1[i] != v2[i]) return false; i++; } return true; } } return false; }
替换函数 replace
public String replace(CharSequence target, CharSequence replacement) { return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( this).replaceAll(Matcher.quoteReplacement(replacement.toString())); } public String replaceFirst(String regex, String replacement) { return Pattern.compile(regex).matcher(this).replaceFirst(replacement); } public String replaceAll(String regex, String replacement) { return Pattern.compile(regex).matcher(this).replaceAll(replacement); } public String replace(char oldChar, char newChar) { if (oldChar != newChar) { int len = value.length; int i = -1; char[] val = value; /* avoid getfield opcode */ while (++i < len) { if (val[i] == oldChar) { break; } } if (i < len) { char buf[] = new char[len]; for (int j = 0; j < i; j++) { buf[j] = val[j]; } while (i < len) { char c = val[i]; buf[i] = (c == oldChar) ? newChar : c; i++; } return new String(buf, true); } } return this; }
常量池相关方法
public native String intern();
运算符的重载
// int转String的方法比较public class Demo { public static void main(String[] args) throws Exception { int i = 5; String i1 = '' + i; String i2 = String.valueOf(i); String i3 = Integer.toString(i); }} // 原始代码public class Demo { public static void main(String[] args) throws Exception { String string='hollis'; String string2 = string + 'chuang'; }} //反编译代码public class Demo { public static void main(String[] args) throws Exception { String string = 'hollis'; String string2 = (new StringBuilder(String.valueOf(string))).append('chuang').toString(); }}
|
|