/content/dam/sonystyle/products/color/([^\/]*)[$|/].*$
<filesMatch "\.(html|htm|js|css)$">
FileETag None
<ifModule mod_headers.c>
Header unset ETag
Header set Cache-Control "max-age=0, no-cache, no-store, must-revalidate"
Header set Pragma "no-cache"
Header set Expires "Wed, 11 Jan 1984 05:00:00 GMT"
</ifModule>
</filesMatch>
轉(zhuǎn)自 Disable caching of assets in Apache
http://bertanguven.com/disabl...
csdn上面的,直接搬了過來:
因為要做觀點,觀點的屋子類似于知乎的話題,所以得想辦法把他給爬下來,搞了半天最終還是妥妥的搞定了,代碼是python寫的,不懂得麻煩自學(xué)哈!懂得直接看代碼,絕對可用
#coding:utf-8
"""
@author:haoning
@create time:2015.8.5
"""
from __future__ import division # 精確除法
from Queue import Queue
from __builtin__ import False
import json
import os
import re
import platform
import uuid
import urllib
import urllib2
import sys
import time
import MySQLdb as mdb
from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding( "utf-8" )
headers = {
'User-Agent' : 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With':'XMLHttpRequest',
'Referer':'https://www.zhihu.com/topics',
'Cookie':'__utma=51854390.517069884.1416212035.1416212035.1416212035.1; q_c1=c02bf44d00d240798bfabcfc95baeb56|1455778173000|1416205243000; _za=b1c8ae35-f986-46a2-b24a-cb9359dc6b2a; aliyungf_tc=AQAAAJ1m71jL1woArKqF22VFnL/wRy6C; _xsrf=9d494558f9271340ab24598d85b2a3c8; cap_id="MDNiMjcwM2U0MTRhNDVmYjgxZWVhOWI0NTA2OGU5OTg=|1455864276|2a4ce8247ebd3c0df5393bb5661713ad9eec01dd"; n_c=1; _alicdn_sec=56c6ba4d556557d27a0f8c876f563d12a285f33a'
}
DB_HOST = '127.0.0.1'
DB_USER = 'root'
DB_PASS = 'root'
queue= Queue() #接收隊列
nodeSet=set()
keywordSet=set()
stop=0
offset=-20
level=0
maxLevel=7
counter=0
base=""
conn = mdb.connect(DB_HOST, DB_USER, DB_PASS, 'zhihu', charset='utf8')
conn.autocommit(False)
curr = conn.cursor()
def get_html(url):
try:
req = urllib2.Request(url)
response = urllib2.urlopen(req,None,3) #在這里應(yīng)該加入代理
html = response.read()
return html
except:
pass
return None
def getTopics():
url = 'https://www.zhihu.com/topics'
print url
try:
req = urllib2.Request(url)
response = urllib2.urlopen(req) #鍦ㄨ繖閲屽簲璇ュ姞鍏ヤ唬鐞?
html = response.read().decode('utf-8')
print html
soup = BeautifulSoup(html)
lis = soup.find_all('li', {'class' : 'zm-topic-cat-item'})
for li in lis:
data_id=li.get('data-id')
name=li.text
curr.execute('select id from classify_new where name=%s',(name))
y= curr.fetchone()
if not y:
curr.execute('INSERT INTO classify_new(data_id,name)VALUES(%s,%s)',(data_id,name))
conn.commit()
except Exception as e:
print "get topic error",e
def get_extension(name):
where=name.rfind('.')
if where!=-1:
return name[where:len(name)]
return None
def which_platform():
sys_str = platform.system()
return sys_str
def GetDateString():
when=time.strftime('%Y-%m-%d',time.localtime(time.time()))
foldername = str(when)
return foldername
def makeDateFolder(par,classify):
try:
if os.path.isdir(par):
newFolderName=par + '//' + GetDateString() + '//' +str(classify)
if which_platform()=="Linux":
newFolderName=par + '/' + GetDateString() + "/" +str(classify)
if not os.path.isdir( newFolderName ):
os.makedirs( newFolderName )
return newFolderName
else:
return None
except Exception,e:
print "kk",e
return None
def download_img(url,classify):
try:
extention=get_extension(url)
if(extention is None):
return None
req = urllib2.Request(url)
resp = urllib2.urlopen(req,None,3)
dataimg=resp.read()
name=str(uuid.uuid1()).replace("-","")+"_www.guandn.com"+extention
top="E://topic_pic"
folder=makeDateFolder(top, classify)
filename=None
if folder is not None:
filename =folder+"http://"+name
try:
if "e82bab09c_m" in str(url):
return True
if not os.path.exists(filename):
file_object = open(filename,'w+b')
file_object.write(dataimg)
file_object.close()
return '/room/default/'+GetDateString()+'/'+str(classify)+"/"+name
else:
print "file exist"
return None
except IOError,e1:
print "e1=",e1
pass
except Exception as e:
print "eee",e
pass
return None #如果沒有下載下來就利用原來網(wǎng)站的鏈接
def getChildren(node,name):
global queue,nodeSet
try:
url="https://www.zhihu.com/topic/"+str(node)+"/hot"
html=get_html(url)
if html is None:
return
soup = BeautifulSoup(html)
p_ch='父話題'
node_name=soup.find('div', {'id' : 'zh-topic-title'}).find('h1').text
topic_cla=soup.find('div', {'class' : 'child-topic'})
if topic_cla is not None:
try:
p_ch=str(topic_cla.text)
aList = soup.find_all('a', {'class' : 'zm-item-tag'}) #獲取所有子節(jié)點
if u'子話題' in p_ch:
for a in aList:
token=a.get('data-token')
a=str(a).replace('\n','').replace('\t','').replace('\r','')
start=str(a).find('>')
end=str(a).rfind('</a>')
new_node=str(str(a)[start+1:end])
curr.execute('select id from rooms where name=%s',(new_node)) #先保證名字絕不相同
y= curr.fetchone()
if not y:
print "y=",y,"new_node=",new_node,"token=",token
queue.put((token,new_node,node_name))
except Exception as e:
print "add queue error",e
except Exception as e:
print "get html error",e
def getContent(n,name,p,top_id):
try:
global counter
curr.execute('select id from rooms where name=%s',(name)) #先保證名字絕不相同
y= curr.fetchone()
print "exist?? ",y,"n=",n
if not y:
url="https://www.zhihu.com/topic/"+str(n)+"/hot"
html=get_html(url)
if html is None:
return
soup = BeautifulSoup(html)
title=soup.find('div', {'id' : 'zh-topic-title'}).find('h1').text
pic_path=soup.find('a',{'id':'zh-avartar-edit-form'}).find('img').get('src')
description=soup.find('div',{'class':'zm-editable-content'})
if description is not None:
description=description.text
if (u"未歸類" in title or u"根話題" in title): #允許入庫,避免死循環(huán)
description=None
tag_path=download_img(pic_path,top_id)
print "tag_path=",tag_path
if (tag_path is not None) or tag_path==True:
if tag_path==True:
tag_path=None
father_id=2 #默認為雜談
curr.execute('select id from rooms where name=%s',(p))
results = curr.fetchall()
for r in results:
father_id=r[0]
name=title
curr.execute('select id from rooms where name=%s',(name)) #先保證名字絕不相同
y= curr.fetchone()
print "store see..",y
if not y:
friends_num=0
temp = time.time()
x = time.localtime(float(temp))
create_time = time.strftime("%Y-%m-%d %H:%M:%S",x) # get time now
create_time
creater_id=None
room_avatar=tag_path
is_pass=1
has_index=0
reason_id=None
#print father_id,name,friends_num,create_time,creater_id,room_avatar,is_pass,has_index,reason_id
######################有資格入庫的內(nèi)容
counter=counter+1
curr.execute("INSERT INTO rooms(father_id,name,friends_num,description,create_time,creater_id,room_avatar,is_pass,has_index,reason_id)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(father_id,name,friends_num,description,create_time,creater_id,room_avatar,is_pass,has_index,reason_id))
conn.commit() #必須時時進入數(shù)據(jù)庫,不然找不到父節(jié)點
if counter % 200==0:
print "current node",name,"num",counter
except Exception as e:
print "get content error",e
def work():
global queue
curr.execute('select id,node,parent,name from classify where status=1')
results = curr.fetchall()
for r in results:
top_id=r[0]
node=r[1]
parent=r[2]
name=r[3]
try:
queue.put((node,name,parent)) #首先放入隊列
while queue.qsize() >0:
n,p=queue.get() #頂節(jié)點出隊
getContent(n,p,top_id)
getChildren(n,name) #出隊內(nèi)容的子節(jié)點
conn.commit()
except Exception as e:
print "what's wrong",e
def new_work():
global queue
curr.execute('select id,data_id,name from classify_new_copy where status=1')
results = curr.fetchall()
for r in results:
top_id=r[0]
data_id=r[1]
name=r[2]
try:
get_topis(data_id,name,top_id)
except:
pass
def get_topis(data_id,name,top_id):
global queue
url = 'https://www.zhihu.com/node/TopicsPlazzaListV2'
isGet = True;
offset = -20;
data_id=str(data_id)
while isGet:
offset = offset + 20
values = {'method': 'next', 'params': '{"topic_id":'+data_id+',"offset":'+str(offset)+',"hash_id":""}'}
try:
msg=None
try:
data = urllib.urlencode(values)
request = urllib2.Request(url,data,headers)
response = urllib2.urlopen(request,None,5)
html=response.read().decode('utf-8')
json_str = json.loads(html)
ms=json_str['msg']
if len(ms) <5:
break
msg=ms[0]
except Exception as e:
print "eeeee",e
#print msg
if msg is not None:
soup = BeautifulSoup(str(msg))
blks = soup.find_all('div', {'class' : 'blk'})
for blk in blks:
page=blk.find('a').get('href')
if page is not None:
node=page.replace("/topic/","") #將更多的種子入庫
parent=name
ne=blk.find('strong').text
try:
queue.put((node,ne,parent)) #首先放入隊列
while queue.qsize() >0:
n,name,p=queue.get() #頂節(jié)點出隊
size=queue.qsize()
if size > 0:
print size
getContent(n,name,p,top_id)
getChildren(n,name) #出隊內(nèi)容的子節(jié)點
conn.commit()
except Exception as e:
print "what's wrong",e
except urllib2.URLError, e:
print "error is",e
pass
if __name__ == '__main__':
i=0
while i<400:
new_work()
i=i+1
說下數(shù)據(jù)庫的問題,我這里就不傳附件了,看字段自己建立,因為這確實太簡單了,我是用的mysql,你看自己的需求自己建。
有什么不懂得麻煩去去轉(zhuǎn)盤網(wǎng)找我,因為這個也是我開發(fā)的,上面會及時更新qq群號,這里不留qq號啥的,以免被系統(tǒng)給K了。
php artisan make:controller --plain
附帶身份憑證的請求與通配符 對于附帶身份憑證的請求,服務(wù)器不得設(shè)置 Access-Control-Allow-Origin 的值為“*”。
這是因為請求的首部中攜帶了 Cookie 信息,如果 Access-Control-Allow-Origin
的值為“*”,請求將會失敗。而將 Access-Control-Allow-Origin 的值設(shè)置為
http://foo.example,則請求將成功執(zhí)行。
來自mdn
所以把*改成你的請求域名http://localhost:8080
如果還不行,
Access-Control-Allow-Credentials: true
Access-Control-Allow-Methods: *
這些字段也都設(shè)置一下
type H map[string]interface{}
不管是gin還是echo,Json的第二個參數(shù)都是interface,你就自己將 map[string]interface{}定義成H就可以了。。。
這只取決于你服務(wù)端的響應(yīng)。在這頁面打開開發(fā)者工具,然后換成你這個segmentfault
問題的url
,你就看到和教程相似的結(jié)果?;蛘唿c開你有問題的頁面,點開Network
,你也能明白為什么你只有content-type
。
在Public目錄下添加.htaccess
文件
deny from all
你的while
循環(huán)里的賦值有問題,一直在重復(fù)為同一個鍵名賦值。
正確的應(yīng)該是:
$data[] = [
'uid' => $row["uid"],
'pcontent' => $row["pcontent"],
'src' => explode(",",$row["ppic"]),
'pdate' => $row["pdate"],
'pweek' => $row["pweek"],
'pweather' => $row["pweather"]
];
這種形式;
不是很理解你的描述,不過你可以了解一下“find_in_set”函數(shù),也許可以幫到你
提示不是很明顯了嗎,提示你對象不存在,那就是在leancloud后臺有沒有創(chuàng)建對象表,如果創(chuàng)建了名稱寫對了嗎?
同樣問題,不知道在哪設(shè)置
flask view里面調(diào)用subprocess.Popen去執(zhí)行腳本,subprocess會生成一個子進程去執(zhí)行腳本,可以立即返回
你貼出來的這一塊代碼看起來好像沒問題,報的是哪一行錯誤,data數(shù)據(jù)確保有沒有錯誤,在確保這些沒有錯誤的情況下還有一種寫法:
$create['title'] = $data['title'];
$create['describes'] = $data['introduce'];
$res = Coupon::create($create);
或者寫成$res = Coupon::insert($create);
試試這個:
if (isset($GLOBALS['HTTP_RAW_POST_DATA'])) {
$final = $GLOBALS['HTTP_RAW_POST_DATA'];
} else {
$final = file_get_contents('php://input');
}
需要前端獲取到后傳過來
$urlArr = array(
'https://172.16.3.33/resource/js/jquery.min.js',
'https://172.16.3.33/resource/js/unit.js',
'https://172.16.3.33/resource/js/dialog/jquery.artDialog.source.js',
'https://172.16.3.33/test.js',
'https://172.16.3.13/resource/xigua/hello.php',
);
$urlTree = arrangeUrlTree($urlArr); //拼接數(shù)據(jù)
echo "<pre>";
print_r($urlTree);
/**
*/
function arrangeUrlTree($arr){
if(!$arr){
return array();
}
$returnArr = array();
foreach($arr as $k => $v){
$url = parse_url($v);//分析URL
//print_r($url);
$hostname = $url['scheme'].'://'.$url['host'];
if(!isset($returnArr[$hostname])){
$returnArr[$hostname] = array();
}
$pathList = explode('/',$url['path']);
$pathList = array_filter($pathList);//去空
$pathList = array_merge($pathList);//重新排序下標
$pathListCount = count($pathList);//長度
$tmpArr = &$returnArr[$hostname];//引用賦值,更新最終數(shù)組的值
$treeLevel = 1;//樹的深度 為遍歷展示用
foreach($pathList as $k2 => $v2){
if($k2 == ($pathListCount-1) ) {//最后一層 則說明是文件 否則為文件夾路徑
$tmpArr[] = $v2;
//$tmpArr['treeLevel'] = $treeLevel; //等級
}else{
if(!isset($tmpArr[$v2])){
$tmpArr[$v2] = array();
}
//$tmpArr['treeLevel'] = $treeLevel;
$tmpArr = &$tmpArr[$v2];//重新賦值數(shù)組 用于循環(huán)整理完完整目錄
}
$treeLevel += 1;
}
}
return $returnArr;
}
mysqli_set_charset需要兩個參數(shù),但是你寫了一個
mysqli_set_charset('utf-8');
兩種模式不要混用
改為
$num = $conn->num_rows;
北大青鳥APTECH成立于1999年。依托北京大學(xué)優(yōu)質(zhì)雄厚的教育資源和背景,秉承“教育改變生活”的發(fā)展理念,致力于培養(yǎng)中國IT技能型緊缺人才,是大數(shù)據(jù)專業(yè)的國家
北大青鳥中博軟件學(xué)院創(chuàng)立于2003年,作為華東區(qū)著名互聯(lián)網(wǎng)學(xué)院和江蘇省首批服務(wù)外包人才培訓(xùn)基地,中博成功培育了近30000名軟件工程師走向高薪崗位,合作企業(yè)超4
中公教育集團創(chuàng)建于1999年,經(jīng)過二十年潛心發(fā)展,已由一家北大畢業(yè)生自主創(chuàng)業(yè)的信息技術(shù)與教育服務(wù)機構(gòu),發(fā)展為教育服務(wù)業(yè)的綜合性企業(yè)集團,成為集合面授教學(xué)培訓(xùn)、網(wǎng)
達內(nèi)教育集團成立于2002年,是一家由留學(xué)海歸創(chuàng)辦的高端職業(yè)教育培訓(xùn)機構(gòu),是中國一站式人才培養(yǎng)平臺、一站式人才輸送平臺。2014年4月3日在美國成功上市,融資1
曾工作于聯(lián)想擔(dān)任系統(tǒng)開發(fā)工程師,曾在博彥科技股份有限公司擔(dān)任項目經(jīng)理從事移動互聯(lián)網(wǎng)管理及研發(fā)工作,曾創(chuàng)辦藍懿科技有限責(zé)任公司從事總經(jīng)理職務(wù)負責(zé)iOS教學(xué)及管理工作。
浪潮集團項目經(jīng)理。精通Java與.NET 技術(shù), 熟練的跨平臺面向?qū)ο箝_發(fā)經(jīng)驗,技術(shù)功底深厚。 授課風(fēng)格 授課風(fēng)格清新自然、條理清晰、主次分明、重點難點突出、引人入勝。
精通HTML5和CSS3;Javascript及主流js庫,具有快速界面開發(fā)的能力,對瀏覽器兼容性、前端性能優(yōu)化等有深入理解。精通網(wǎng)頁制作和網(wǎng)頁游戲開發(fā)。
具有10 年的Java 企業(yè)應(yīng)用開發(fā)經(jīng)驗。曾經(jīng)歷任德國Software AG 技術(shù)顧問,美國Dachieve 系統(tǒng)架構(gòu)師,美國AngelEngineers Inc. 系統(tǒng)架構(gòu)師。